diff --git a/.github/actions/allure-report-generate/action.yml b/.github/actions/allure-report-generate/action.yml index d1d09223dbff..d6219c31b4a5 100644 --- a/.github/actions/allure-report-generate/action.yml +++ b/.github/actions/allure-report-generate/action.yml @@ -43,7 +43,8 @@ runs: PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true) if [ "${PR_NUMBER}" != "null" ]; then BRANCH_OR_PR=pr-${PR_NUMBER} - elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || [ "${GITHUB_REF_NAME}" = "release-proxy" ]; then + elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || \ + [ "${GITHUB_REF_NAME}" = "release-proxy" ] || [ "${GITHUB_REF_NAME}" = "release-compute" ]; then # Shortcut for special branches BRANCH_OR_PR=${GITHUB_REF_NAME} else diff --git a/.github/actions/allure-report-store/action.yml b/.github/actions/allure-report-store/action.yml index 9c376f420ad7..3c83656c8940 100644 --- a/.github/actions/allure-report-store/action.yml +++ b/.github/actions/allure-report-store/action.yml @@ -23,7 +23,8 @@ runs: PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true) if [ "${PR_NUMBER}" != "null" ]; then BRANCH_OR_PR=pr-${PR_NUMBER} - elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || [ "${GITHUB_REF_NAME}" = "release-proxy" ]; then + elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || \ + [ "${GITHUB_REF_NAME}" = "release-proxy" ] || [ "${GITHUB_REF_NAME}" = "release-compute" ]; then # Shortcut for special branches BRANCH_OR_PR=${GITHUB_REF_NAME} else diff --git a/.github/workflows/_create-release-pr.yml b/.github/workflows/_create-release-pr.yml index cc6994397f8d..3c130c822945 100644 --- a/.github/workflows/_create-release-pr.yml +++ b/.github/workflows/_create-release-pr.yml @@ -21,7 +21,7 @@ defaults: shell: bash -euo pipefail {0} jobs: - create-storage-release-branch: + create-release-branch: runs-on: ubuntu-22.04 permissions: diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index ea8fee80c220..7621d72f64eb 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -249,7 +249,7 @@ jobs: # Post both success and failure to the Slack channel - name: Post to a Slack channel - if: ${{ github.event.schedule }} + if: ${{ github.event.schedule && !cancelled() }} uses: slackapi/slack-github-action@v1 with: channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9830c2a0c9b2..cb966f292ee5 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -6,6 +6,7 @@ on: - main - release - release-proxy + - release-compute pull_request: defaults: @@ -70,8 +71,10 @@ jobs: echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT + elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then + echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT else - echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'" + echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'" echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT fi shell: bash @@ -513,7 +516,7 @@ jobs: }) trigger-e2e-tests: - if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' }} + if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }} needs: [ check-permissions, promote-images, tag ] uses: ./.github/workflows/trigger-e2e-tests.yml secrets: inherit @@ -669,7 +672,7 @@ jobs: neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }} - name: Build neon extensions test image - if: matrix.version.pg == 'v16' + if: matrix.version.pg >= 'v16' uses: docker/build-push-action@v6 with: context: . @@ -684,8 +687,7 @@ jobs: pull: true file: compute/compute-node.Dockerfile target: neon-pg-ext-test - cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }} - cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }} + cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }} tags: | neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }} @@ -708,7 +710,7 @@ jobs: push: true pull: true file: compute/compute-node.Dockerfile - cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }} + cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }} cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-tools-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }} tags: | neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }} @@ -744,7 +746,7 @@ jobs: neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64 - name: Create multi-arch neon-test-extensions image - if: matrix.version.pg == 'v16' + if: matrix.version.pg >= 'v16' run: | docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \ -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \ @@ -833,6 +835,7 @@ jobs: fail-fast: false matrix: arch: [ x64, arm64 ] + pg_version: [v16, v17] runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }} @@ -871,7 +874,10 @@ jobs: - name: Verify docker-compose example and test extensions timeout-minutes: 20 - run: env TAG=${{needs.tag.outputs.build-tag}} ./docker-compose/docker_compose_test.sh + env: + TAG: ${{needs.tag.outputs.build-tag}} + TEST_VERSION_ONLY: ${{ matrix.pg_version }} + run: ./docker-compose/docker_compose_test.sh - name: Print logs and clean up if: always() @@ -931,7 +937,7 @@ jobs: neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }} - name: Configure AWS-prod credentials - if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' + if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' uses: aws-actions/configure-aws-credentials@v4 with: aws-region: eu-central-1 @@ -940,12 +946,12 @@ jobs: - name: Login to prod ECR uses: docker/login-action@v3 - if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' + if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' with: registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com - name: Copy all images to prod ECR - if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' + if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' run: | for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16,v17}; do docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \ @@ -965,7 +971,7 @@ jobs: tenant_id: ${{ vars.AZURE_TENANT_ID }} push-to-acr-prod: - if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' + if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' needs: [ tag, promote-images ] uses: ./.github/workflows/_push-to-acr.yml with: @@ -1053,7 +1059,7 @@ jobs: deploy: needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ] # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod` - if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy') && !failure() && !cancelled() + if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled() runs-on: [ self-hosted, small ] container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest @@ -1102,13 +1108,15 @@ jobs: -f deployProxyAuthBroker=true \ -f branch=main \ -f dockerTag=${{needs.tag.outputs.build-tag}} + elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then + gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.tag.outputs.build-tag}} else - echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'" + echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main', 'release', 'release-proxy' or 'release-compute'" exit 1 fi - name: Create git tag - if: github.ref_name == 'release' || github.ref_name == 'release-proxy' + if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' uses: actions/github-script@v7 with: # Retry script for 5XX server errors: https://github.com/actions/github-script#retries diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml index 1033dc6489f7..a5810e91a42b 100644 --- a/.github/workflows/ingest_benchmark.yml +++ b/.github/workflows/ingest_benchmark.yml @@ -26,6 +26,7 @@ concurrency: jobs: ingest: strategy: + fail-fast: false # allow other variants to continue even if one fails matrix: target_project: [new_empty_project, large_existing_project] permissions: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 11f010b6d4f6..f0273b977f0e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,6 +15,10 @@ on: type: boolean description: 'Create Proxy release PR' required: false + create-compute-release-branch: + type: boolean + description: 'Create Compute release PR' + required: false # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job. permissions: {} @@ -25,20 +29,20 @@ defaults: jobs: create-storage-release-branch: - if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }} + if: ${{ github.event.schedule == '0 6 * * MON' || inputs.create-storage-release-branch }} permissions: contents: write uses: ./.github/workflows/_create-release-pr.yml with: - component-name: 'Storage & Compute' + component-name: 'Storage' release-branch: 'release' secrets: ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} create-proxy-release-branch: - if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }} + if: ${{ github.event.schedule == '0 6 * * THU' || inputs.create-proxy-release-branch }} permissions: contents: write @@ -49,3 +53,16 @@ jobs: release-branch: 'release-proxy' secrets: ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} + + create-compute-release-branch: + if: inputs.create-compute-release-branch + + permissions: + contents: write + + uses: ./.github/workflows/_create-release-pr.yml + with: + component-name: 'Compute' + release-branch: 'release-compute' + secrets: + ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} diff --git a/.github/workflows/trigger-e2e-tests.yml b/.github/workflows/trigger-e2e-tests.yml index 1e7264c55a87..70c2e8549f3e 100644 --- a/.github/workflows/trigger-e2e-tests.yml +++ b/.github/workflows/trigger-e2e-tests.yml @@ -51,6 +51,8 @@ jobs: echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT + elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then + echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT else echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'" BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId') diff --git a/CODEOWNERS b/CODEOWNERS index 21b0e7c51f0f..71b5e65f94a3 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,15 +1,29 @@ -/.github/ @neondatabase/developer-productivity -/compute_tools/ @neondatabase/control-plane @neondatabase/compute -/libs/pageserver_api/ @neondatabase/storage -/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage -/libs/remote_storage/ @neondatabase/storage -/libs/safekeeper_api/ @neondatabase/storage +# Autoscaling /libs/vm_monitor/ @neondatabase/autoscaling -/pageserver/ @neondatabase/storage + +# DevProd +/.github/ @neondatabase/developer-productivity + +# Compute /pgxn/ @neondatabase/compute -/pgxn/neon/ @neondatabase/compute @neondatabase/storage +/vendor/ @neondatabase/compute +/compute/ @neondatabase/compute +/compute_tools/ @neondatabase/compute + +# Proxy +/libs/proxy/ @neondatabase/proxy /proxy/ @neondatabase/proxy + +# Storage +/pageserver/ @neondatabase/storage /safekeeper/ @neondatabase/storage /storage_controller @neondatabase/storage /storage_scrubber @neondatabase/storage -/vendor/ @neondatabase/compute +/libs/pageserver_api/ @neondatabase/storage +/libs/remote_storage/ @neondatabase/storage +/libs/safekeeper_api/ @neondatabase/storage + +# Shared +/pgxn/neon/ @neondatabase/compute @neondatabase/storage +/libs/compute_api/ @neondatabase/compute @neondatabase/control-plane +/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage diff --git a/Cargo.lock b/Cargo.lock index 5ce27a7d45e7..f6e0024d874c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,16 +84,16 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.3.2" +version = "0.6.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", - "is-terminal", + "is_terminal_polyfill", "utf8parse", ] @@ -123,19 +123,19 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "1.0.1" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" dependencies = [ "anstyle", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "anyhow" -version = "1.0.71" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" +checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" dependencies = [ "backtrace", ] @@ -185,7 +185,7 @@ checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", "synstructure", ] @@ -197,7 +197,7 @@ checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -256,7 +256,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -267,7 +267,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -301,7 +301,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand 2.0.0", + "fastrand 2.2.0", "hex", "http 0.2.9", "hyper 0.14.30", @@ -341,7 +341,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand 2.0.0", + "fastrand 2.2.0", "http 0.2.9", "http-body 0.4.5", "once_cell", @@ -417,7 +417,7 @@ dependencies = [ "aws-smithy-xml", "aws-types", "bytes", - "fastrand 2.0.0", + "fastrand 2.2.0", "hex", "hmac", "http 0.2.9", @@ -621,7 +621,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand 2.0.0", + "fastrand 2.2.0", "h2 0.3.26", "http 0.2.9", "http-body 0.4.5", @@ -969,7 +969,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -1031,9 +1031,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" -version = "1.5.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" dependencies = [ "serde", ] @@ -1167,45 +1167,43 @@ dependencies = [ [[package]] name = "clap" -version = "4.3.0" +version = "4.5.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93aae7a4192245f70fe75dd9157fc7b4a5bf53e88d30bd4396f7d8f9284d5acc" +checksum = "69371e34337c4c984bbe322360c2547210bf632eb2814bbe78a6e87a2935bd2b" dependencies = [ "clap_builder", "clap_derive", - "once_cell", ] [[package]] name = "clap_builder" -version = "4.3.0" +version = "4.5.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990" +checksum = "6e24c1b4099818523236a8ca881d2b45db98dadfb4625cf6608c12069fcbbde1" dependencies = [ "anstream", "anstyle", - "bitflags 1.3.2", "clap_lex", - "strsim", + "strsim 0.11.1", ] [[package]] name = "clap_derive" -version = "4.3.0" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "191d9573962933b4027f932c600cd252ce27a8ad5979418fe78e43c07996f27b" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" dependencies = [ - "heck 0.4.1", + "heck", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] name = "clap_lex" -version = "0.5.0" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" [[package]] name = "colorchoice" @@ -1614,8 +1612,8 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim", - "syn 2.0.52", + "strsim 0.10.0", + "syn 2.0.90", ] [[package]] @@ -1626,7 +1624,7 @@ checksum = "29a358ff9f12ec09c3e61fef9b5a9902623a695a46a917b07f269bff1445611a" dependencies = [ "darling_core", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -1749,7 +1747,7 @@ dependencies = [ "dsl_auto_type", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -1769,7 +1767,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "209c735641a413bc68c4923a9d6ad4bcb3ca306b794edaa7eb0b3228a99ffb25" dependencies = [ - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -1792,7 +1790,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -1812,10 +1810,10 @@ checksum = "0892a17df262a24294c382f0d5997571006e7a4348b4327557c4ff1cd4a8bccc" dependencies = [ "darling", "either", - "heck 0.5.0", + "heck", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -1947,7 +1945,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -1980,7 +1978,7 @@ checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -2054,9 +2052,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "ff" @@ -2234,7 +2232,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -2337,7 +2335,7 @@ checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -2465,12 +2463,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -2888,6 +2880,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.10.5" @@ -2912,6 +2910,23 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jemalloc_pprof" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a883828bd6a4b957cd9f618886ff19e5f3ebd34e06ba0e855849e049fef32fb" +dependencies = [ + "anyhow", + "libc", + "mappings", + "once_cell", + "pprof_util", + "tempfile", + "tikv-jemalloc-ctl", + "tokio", + "tracing", +] + [[package]] name = "jobserver" version = "0.1.32" @@ -3022,9 +3037,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.150" +version = "0.2.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" [[package]] name = "libloading" @@ -3044,9 +3059,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "linux-raw-sys" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "linux-raw-sys" @@ -3079,6 +3094,19 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "mappings" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce9229c438fbf1c333926e2053c4c091feabbd40a1b590ec62710fea2384af9e" +dependencies = [ + "anyhow", + "libc", + "once_cell", + "pprof_util", + "tracing", +] + [[package]] name = "matchers" version = "0.1.0" @@ -3139,10 +3167,10 @@ version = "0.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e6777fc80a575f9503d908c8b498782a6c3ee88a06cb416dc3941401e43b94" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -3346,6 +3374,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" dependencies = [ + "num-bigint", "num-complex", "num-integer", "num-iter", @@ -3434,6 +3463,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" dependencies = [ "autocfg", + "num-bigint", "num-integer", "num-traits", ] @@ -3497,9 +3527,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "oorandom" @@ -3515,9 +3545,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "opentelemetry" -version = "0.24.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c365a63eec4f55b7efeceb724f1336f26a9cf3427b70e59e2cd2a5b947fba96" +checksum = "570074cc999d1a58184080966e5bd3bf3a9a4af650c3b05047c2621e7405cd17" dependencies = [ "futures-core", "futures-sink", @@ -3529,9 +3559,9 @@ dependencies = [ [[package]] name = "opentelemetry-http" -version = "0.13.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad31e9de44ee3538fb9d64fe3376c1362f406162434609e79aea2a41a0af78ab" +checksum = "6351496aeaa49d7c267fb480678d85d1cd30c5edb20b497c48c56f62a8c14b99" dependencies = [ "async-trait", "bytes", @@ -3542,9 +3572,9 @@ dependencies = [ [[package]] name = "opentelemetry-otlp" -version = "0.17.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b925a602ffb916fb7421276b86756027b37ee708f9dce2dbdcc51739f07e727" +checksum = "29e1f9c8b032d4f635c730c0efcf731d5e2530ea13fa8bef7939ddc8420696bd" dependencies = [ "async-trait", "futures-core", @@ -3560,9 +3590,9 @@ dependencies = [ [[package]] name = "opentelemetry-proto" -version = "0.7.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30ee9f20bff9c984511a02f082dc8ede839e4a9bf15cc2487c8d6fea5ad850d9" +checksum = "c9d3968ce3aefdcca5c27e3c4ea4391b37547726a70893aab52d3de95d5f8b34" dependencies = [ "opentelemetry", "opentelemetry_sdk", @@ -3572,15 +3602,15 @@ dependencies = [ [[package]] name = "opentelemetry-semantic-conventions" -version = "0.16.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cefe0543875379e47eb5f1e68ff83f45cc41366a92dfd0d073d513bf68e9a05" +checksum = "db945c1eaea8ac6a9677185357480d215bb6999faa9f691d0c4d4d641eab7a09" [[package]] name = "opentelemetry_sdk" -version = "0.24.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692eac490ec80f24a17828d49b40b60f5aeaccdfe6a503f939713afd22bc28df" +checksum = "d2c627d9f4c9cdc1f21a29ee4bfbd6028fcb8bcf2a857b43f3abdf72c9c862f3" dependencies = [ "async-trait", "futures-channel", @@ -3954,7 +3984,7 @@ dependencies = [ "parquet", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -4056,7 +4086,7 @@ checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -4139,7 +4169,7 @@ dependencies = [ [[package]] name = "postgres" version = "0.19.4" -source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#00940fcdb57a8e99e805297b75839e7c4c7b1796" +source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#511f998c00148ab7c847bd7e6cfd3a906d0e7473" dependencies = [ "bytes", "fallible-iterator", @@ -4152,7 +4182,7 @@ dependencies = [ [[package]] name = "postgres-protocol" version = "0.6.4" -source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#00940fcdb57a8e99e805297b75839e7c4c7b1796" +source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#511f998c00148ab7c847bd7e6cfd3a906d0e7473" dependencies = [ "base64 0.20.0", "byteorder", @@ -4165,7 +4195,6 @@ dependencies = [ "rand 0.8.5", "sha2", "stringprep", - "tokio", ] [[package]] @@ -4177,7 +4206,6 @@ dependencies = [ "bytes", "fallible-iterator", "hmac", - "md-5", "memchr", "rand 0.8.5", "sha2", @@ -4188,7 +4216,7 @@ dependencies = [ [[package]] name = "postgres-types" version = "0.2.4" -source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#00940fcdb57a8e99e805297b75839e7c4c7b1796" +source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#511f998c00148ab7c847bd7e6cfd3a906d0e7473" dependencies = [ "bytes", "fallible-iterator", @@ -4298,6 +4326,19 @@ dependencies = [ "thiserror", ] +[[package]] +name = "pprof_util" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65c568b3f8c1c37886ae07459b1946249e725c315306b03be5632f84c239f781" +dependencies = [ + "anyhow", + "flate2", + "num", + "paste", + "prost", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -4334,7 +4375,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" dependencies = [ "proc-macro2", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -4348,9 +4389,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -4414,7 +4455,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" dependencies = [ "bytes", - "heck 0.5.0", + "heck", "itertools 0.12.1", "log", "multimap", @@ -4424,7 +4465,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.52", + "syn 2.0.90", "tempfile", ] @@ -4438,7 +4479,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -4567,6 +4608,7 @@ dependencies = [ "tikv-jemalloc-ctl", "tikv-jemallocator", "tokio", + "tokio-postgres", "tokio-postgres2", "tokio-rustls 0.26.0", "tokio-tungstenite", @@ -4992,9 +5034,9 @@ dependencies = [ [[package]] name = "reqwest-middleware" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0209efb52486ad88136190094ee214759ef7507068b27992256ed6610eb71a01" +checksum = "d1ccd3b55e711f91a9885a2fa6fbbb2e39db1776420b062efc058c6410f7e5e3" dependencies = [ "anyhow", "async-trait", @@ -5007,13 +5049,12 @@ dependencies = [ [[package]] name = "reqwest-retry" -version = "0.5.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40f342894422862af74c50e1e9601cf0931accc9c6981e5eb413c46603b616b5" +checksum = "29c73e4195a6bfbcb174b790d9b3407ab90646976c55de58a6515da25d851178" dependencies = [ "anyhow", "async-trait", - "chrono", "futures", "getrandom 0.2.11", "http 1.1.0", @@ -5022,6 +5063,7 @@ dependencies = [ "reqwest 0.12.4", "reqwest-middleware", "retry-policies", + "thiserror", "tokio", "tracing", "wasm-timer", @@ -5029,9 +5071,9 @@ dependencies = [ [[package]] name = "reqwest-tracing" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfdd9bfa64c72233d8dd99ab7883efcdefe9e16d46488ecb9228b71a2e2ceb45" +checksum = "ff82cf5730a1311fb9413b0bc2b8e743e0157cd73f010ab4ec374a923873b6a2" dependencies = [ "anyhow", "async-trait", @@ -5047,12 +5089,10 @@ dependencies = [ [[package]] name = "retry-policies" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "493b4243e32d6eedd29f9a398896e35c6943a123b55eec97dcaee98310d25810" +checksum = "5875471e6cab2871bc150ecb8c727db5113c9338cc3354dc5ee3425b6aa40a1c" dependencies = [ - "anyhow", - "chrono", "rand 0.8.5", ] @@ -5176,7 +5216,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.52", + "syn 2.0.90", "unicode-ident", ] @@ -5222,14 +5262,14 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.28" +version = "0.38.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" dependencies = [ "bitflags 2.4.1", "errno", "libc", - "linux-raw-sys 0.4.13", + "linux-raw-sys 0.4.14", "windows-sys 0.52.0", ] @@ -5684,7 +5724,7 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -5766,7 +5806,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -6123,6 +6163,12 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "strum" version = "0.26.3" @@ -6135,11 +6181,11 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -6190,9 +6236,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.52" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -6222,7 +6268,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -6253,13 +6299,13 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.9.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", - "fastrand 2.0.0", - "redox_syscall 0.4.1", + "fastrand 2.2.0", + "once_cell", "rustix", "windows-sys 0.52.0", ] @@ -6300,27 +6346,27 @@ checksum = "78ea17a2dc368aeca6f554343ced1b1e31f76d63683fa8016e5844bd7a5144a1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] name = "thiserror" -version = "1.0.57" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.57" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -6494,13 +6540,13 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] name = "tokio-postgres" version = "0.7.7" -source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#00940fcdb57a8e99e805297b75839e7c4c7b1796" +source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#511f998c00148ab7c847bd7e6cfd3a906d0e7473" dependencies = [ "async-trait", "byteorder", @@ -6719,7 +6765,7 @@ dependencies = [ "prost-build", "prost-types", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -6756,9 +6802,9 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "log", "pin-project-lite", @@ -6779,20 +6825,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", "valuable", @@ -6821,9 +6867,9 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.25.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9784ed4da7d921bc8df6963f8c80a0e4ce34ba6ba76668acadd3edbd985ff3b" +checksum = "dc58af5d3f6c5811462cabb3289aec0093f7338e367e5a33d28c0433b3c7360b" dependencies = [ "js-sys", "once_cell", @@ -6839,9 +6885,9 @@ dependencies = [ [[package]] name = "tracing-serde" -version = "0.1.3" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" dependencies = [ "serde", "tracing-core", @@ -6849,9 +6895,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", "once_cell", @@ -7060,6 +7106,7 @@ dependencies = [ "hex-literal", "humantime", "hyper 0.14.30", + "jemalloc_pprof", "jsonwebtoken", "metrics", "nix 0.27.1", @@ -7258,7 +7305,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", "wasm-bindgen-shared", ] @@ -7292,7 +7339,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -7646,8 +7693,12 @@ dependencies = [ "memchr", "nix 0.26.4", "nom", + "num", "num-bigint", + "num-complex", "num-integer", + "num-iter", + "num-rational", "num-traits", "once_cell", "parquet", @@ -7669,8 +7720,9 @@ dependencies = [ "smallvec", "spki 0.7.3", "subtle", - "syn 2.0.52", + "syn 2.0.90", "sync_wrapper 0.1.2", + "tikv-jemalloc-ctl", "tikv-jemalloc-sys", "time", "time-macros", @@ -7769,7 +7821,7 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] @@ -7790,7 +7842,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.90", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 64c384f17a4b..a35823e0c2c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,7 +74,7 @@ bindgen = "0.70" bit_field = "0.10.2" bstr = "1.0" byteorder = "1.4" -bytes = "1.0" +bytes = "1.9" camino = "1.1.6" cfg-if = "1.0.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } @@ -115,6 +115,7 @@ indoc = "2" ipnet = "2.10.0" itertools = "0.10" itoa = "1.0.11" +jemalloc_pprof = "0.6" jsonwebtoken = "9" lasso = "0.7" libc = "0.2" @@ -127,10 +128,10 @@ notify = "6.0.0" num_cpus = "1.15" num-traits = "0.2.15" once_cell = "1.13" -opentelemetry = "0.24" -opentelemetry_sdk = "0.24" -opentelemetry-otlp = { version = "0.17", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] } -opentelemetry-semantic-conventions = "0.16" +opentelemetry = "0.26" +opentelemetry_sdk = "0.26" +opentelemetry-otlp = { version = "0.26", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] } +opentelemetry-semantic-conventions = "0.26" parking_lot = "0.12" parquet = { version = "53", default-features = false, features = ["zstd"] } parquet_derive = "53" @@ -144,9 +145,9 @@ rand = "0.8" redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] } regex = "1.10.2" reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } -reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_24"] } -reqwest-middleware = "0.3.0" -reqwest-retry = "0.5" +reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_26"] } +reqwest-middleware = "0.4" +reqwest-retry = "0.7" routerify = "3" rpds = "0.13" rustc-hash = "1.1.0" @@ -175,7 +176,7 @@ sync_wrapper = "0.1.2" tar = "0.4" test-context = "0.3" thiserror = "1.0" -tikv-jemallocator = { version = "0.6", features = ["stats"] } +tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] } tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] } tokio = { version = "1.17", features = ["macros"] } tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" } @@ -191,7 +192,7 @@ tonic = {version = "0.12.3", features = ["tls", "tls-roots"]} tower-service = "0.3.2" tracing = "0.1" tracing-error = "0.2" -tracing-opentelemetry = "0.25" +tracing-opentelemetry = "0.27" tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] } try-lock = "0.2.5" twox-hash = { version = "1.6.3", default-features = false } diff --git a/build-tools.Dockerfile b/build-tools.Dockerfile index 2671702697b7..fa84e467ad61 100644 --- a/build-tools.Dockerfile +++ b/build-tools.Dockerfile @@ -115,7 +115,7 @@ RUN set -e \ # Keep the version the same as in compute/compute-node.Dockerfile and # test_runner/regress/test_compute_metrics.py. -ENV SQL_EXPORTER_VERSION=0.13.1 +ENV SQL_EXPORTER_VERSION=0.16.0 RUN curl -fsSL \ "https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \ --output sql_exporter.tar.gz \ diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 956701805393..33d2a1028521 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -358,10 +358,10 @@ COPY compute/patches/pgvector.patch /pgvector.patch # because we build the images on different machines than where we run them. # Pass OPTFLAGS="" to remove it. # -# vector 0.7.4 supports v17 -# last release v0.7.4 - Aug 5, 2024 -RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.4.tar.gz -O pgvector.tar.gz && \ - echo "0341edf89b1924ae0d552f617e14fb7f8867c0194ed775bcc44fa40288642583 pgvector.tar.gz" | sha256sum --check && \ +# vector >0.7.4 supports v17 +# last release v0.8.0 - Oct 30, 2024 +RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.8.0.tar.gz -O pgvector.tar.gz && \ + echo "867a2c328d4928a5a9d6f052cd3bc78c7d60228a9b914ad32aa3db88e9de27b0 pgvector.tar.gz" | sha256sum --check && \ mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \ patch -p1 < /pgvector.patch && \ make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -1324,7 +1324,7 @@ FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter # Keep the version the same as in build-tools.Dockerfile and # test_runner/regress/test_compute_metrics.py. -FROM burningalchemist/sql_exporter:0.13.1 AS sql-exporter +FROM burningalchemist/sql_exporter:0.16.0 AS sql-exporter ######################################################################################### # @@ -1367,15 +1367,12 @@ RUN make PG_VERSION="${PG_VERSION}" -C compute FROM neon-pg-ext-build AS neon-pg-ext-test ARG PG_VERSION -RUN case "${PG_VERSION}" in "v17") \ - echo "v17 extensions are not supported yet. Quit" && exit 0;; \ - esac && \ - mkdir /ext-src +RUN mkdir /ext-src #COPY --from=postgis-build /postgis.tar.gz /ext-src/ #COPY --from=postgis-build /sfcgal/* /usr COPY --from=plv8-build /plv8.tar.gz /ext-src/ -COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/ +#COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/ COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/ COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/ COPY --from=vector-pg-build /pgvector.patch /ext-src/ @@ -1395,7 +1392,7 @@ COPY --from=hll-pg-build /hll.tar.gz /ext-src COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src #COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src -COPY compute/patches/pg_hint_plan.patch /ext-src +COPY compute/patches/pg_hint_plan_${PG_VERSION}.patch /ext-src COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src COPY compute/patches/pg_cron.patch /ext-src #COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src @@ -1405,38 +1402,23 @@ COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src #COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src #COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src -COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src +#pg_anon is not supported yet for pg v17 so, don't fail if nothing found +COPY --from=pg-anon-pg-build /pg_anon.tar.g? /ext-src COPY compute/patches/pg_anon.patch /ext-src COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src -RUN case "${PG_VERSION}" in "v17") \ - echo "v17 extensions are not supported yet. Quit" && exit 0;; \ - esac && \ - cd /ext-src/ && for f in *.tar.gz; \ +RUN cd /ext-src/ && for f in *.tar.gz; \ do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \ rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \ || exit 1; rm -f $f; done -RUN case "${PG_VERSION}" in "v17") \ - echo "v17 extensions are not supported yet. Quit" && exit 0;; \ - esac && \ - cd /ext-src/rum-src && patch -p1 <../rum.patch -RUN case "${PG_VERSION}" in "v17") \ - echo "v17 extensions are not supported yet. Quit" && exit 0;; \ - esac && \ - cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch -RUN case "${PG_VERSION}" in "v17") \ - echo "v17 extensions are not supported yet. Quit" && exit 0;; \ - esac && \ - cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch +RUN cd /ext-src/rum-src && patch -p1 <../rum.patch +RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch +RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION}.patch COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh RUN case "${PG_VERSION}" in "v17") \ - echo "v17 extensions are not supported yet. Quit" && exit 0;; \ - esac && \ - patch -p1 true)).size), 0) + FROM (SELECT * FROM pg_ls_dir('pg_logical/snapshots') WHERE pg_ls_dir LIKE '%.snap') AS name + ) AS logical_snapshots_bytes; diff --git a/compute/patches/pg_hint_plan.patch b/compute/patches/pg_hint_plan_v16.patch similarity index 100% rename from compute/patches/pg_hint_plan.patch rename to compute/patches/pg_hint_plan_v16.patch diff --git a/compute/patches/pg_hint_plan_v17.patch b/compute/patches/pg_hint_plan_v17.patch new file mode 100644 index 000000000000..dbf4e470ea98 --- /dev/null +++ b/compute/patches/pg_hint_plan_v17.patch @@ -0,0 +1,174 @@ +diff --git a/expected/ut-A.out b/expected/ut-A.out +index e7d68a1..65a056c 100644 +--- a/expected/ut-A.out ++++ b/expected/ut-A.out +@@ -9,13 +9,16 @@ SET search_path TO public; + ---- + -- No.A-1-1-3 + CREATE EXTENSION pg_hint_plan; ++LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan + -- No.A-1-2-3 + DROP EXTENSION pg_hint_plan; + -- No.A-1-1-4 + CREATE SCHEMA other_schema; + CREATE EXTENSION pg_hint_plan SCHEMA other_schema; ++LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan + ERROR: extension "pg_hint_plan" must be installed in schema "hint_plan" + CREATE EXTENSION pg_hint_plan; ++LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan + DROP SCHEMA other_schema; + ---- + ---- No. A-5-1 comment pattern +diff --git a/expected/ut-J.out b/expected/ut-J.out +index 2fa3c70..314e929 100644 +--- a/expected/ut-J.out ++++ b/expected/ut-J.out +@@ -789,38 +789,6 @@ NestLoop(st1 st2) + MergeJoin(t1 t2) + not used hint: + duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-NestLoop(st1 st2) +-MergeJoin(t1 t2) +-duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-NestLoop(st1 st2) +-MergeJoin(t1 t2) +-duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-NestLoop(st1 st2) +-MergeJoin(t1 t2) +-duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-NestLoop(st1 st2) +-MergeJoin(t1 t2) +-duplication hint: + error hint: + + explain_filter +diff --git a/expected/ut-S.out b/expected/ut-S.out +index 0bfcfb8..e75f581 100644 +--- a/expected/ut-S.out ++++ b/expected/ut-S.out +@@ -4415,34 +4415,6 @@ used hint: + IndexScan(ti1 ti1_pred) + not used hint: + duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-IndexScan(ti1 ti1_pred) +-duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-IndexScan(ti1 ti1_pred) +-duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-IndexScan(ti1 ti1_pred) +-duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-IndexScan(ti1 ti1_pred) +-duplication hint: + error hint: + + explain_filter +diff --git a/expected/ut-W.out b/expected/ut-W.out +index a09bd34..0ad227c 100644 +--- a/expected/ut-W.out ++++ b/expected/ut-W.out +@@ -1341,54 +1341,6 @@ IndexScan(ft1) + IndexScan(t) + Parallel(s1 3 hard) + duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-IndexScan(*VALUES*) +-SeqScan(cte1) +-IndexScan(ft1) +-IndexScan(t) +-Parallel(p1 5 hard) +-Parallel(s1 3 hard) +-duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-IndexScan(*VALUES*) +-SeqScan(cte1) +-IndexScan(ft1) +-IndexScan(t) +-Parallel(p1 5 hard) +-Parallel(s1 3 hard) +-duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-IndexScan(*VALUES*) +-SeqScan(cte1) +-IndexScan(ft1) +-IndexScan(t) +-Parallel(p1 5 hard) +-Parallel(s1 3 hard) +-duplication hint: +-error hint: +- +-LOG: pg_hint_plan: +-used hint: +-not used hint: +-IndexScan(*VALUES*) +-SeqScan(cte1) +-IndexScan(ft1) +-IndexScan(t) +-Parallel(p1 5 hard) +-Parallel(s1 3 hard) +-duplication hint: + error hint: + + explain_filter +diff --git a/expected/ut-fdw.out b/expected/ut-fdw.out +index 017fa4b..98d989b 100644 +--- a/expected/ut-fdw.out ++++ b/expected/ut-fdw.out +@@ -7,6 +7,7 @@ SET pg_hint_plan.debug_print TO on; + SET client_min_messages TO LOG; + SET pg_hint_plan.enable_hint TO on; + CREATE EXTENSION file_fdw; ++LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/file_fdw + CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw; + CREATE USER MAPPING FOR PUBLIC SERVER file_server; + CREATE FOREIGN TABLE ft1 (id int, val int) SERVER file_server OPTIONS (format 'csv', filename :'filename'); diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index b178d7abd6d6..e73ccd908e3e 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -335,6 +335,7 @@ fn wait_spec( pgdata: pgdata.to_string(), pgbin: pgbin.to_string(), pgversion: get_pg_version_string(pgbin), + http_port, live_config_allowed, state: Mutex::new(new_state), state_changed: Condvar::new(), @@ -389,7 +390,6 @@ fn wait_spec( Ok(WaitSpecResult { compute, - http_port, resize_swap_on_bind, set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(), }) @@ -397,8 +397,6 @@ fn wait_spec( struct WaitSpecResult { compute: Arc, - // passed through from ProcessCliResult - http_port: u16, resize_swap_on_bind: bool, set_disk_quota_for_fs: Option, } @@ -408,7 +406,6 @@ fn start_postgres( #[allow(unused_variables)] matches: &clap::ArgMatches, WaitSpecResult { compute, - http_port, resize_swap_on_bind, set_disk_quota_for_fs, }: WaitSpecResult, @@ -481,12 +478,10 @@ fn start_postgres( } } - let extension_server_port: u16 = http_port; - // Start Postgres let mut pg = None; if !prestartup_failed { - pg = match compute.start_compute(extension_server_port) { + pg = match compute.start_compute() { Ok(pg) => Some(pg), Err(err) => { error!("could not start the compute node: {:#}", err); diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index da1caf1a9b2f..d72a04f2f979 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -79,6 +79,8 @@ pub struct ComputeNode { /// - we push spec and it does configuration /// - but then it is restarted without any spec again pub live_config_allowed: bool, + /// The port that the compute's HTTP server listens on + pub http_port: u16, /// Volatile part of the `ComputeNode`, which should be used under `Mutex`. /// To allow HTTP API server to serving status requests, while configuration /// is in progress, lock should be held only for short periods of time to do @@ -611,11 +613,7 @@ impl ComputeNode { /// Do all the preparations like PGDATA directory creation, configuration, /// safekeepers sync, basebackup, etc. #[instrument(skip_all)] - pub fn prepare_pgdata( - &self, - compute_state: &ComputeState, - extension_server_port: u16, - ) -> Result<()> { + pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> { let pspec = compute_state.pspec.as_ref().expect("spec must be set"); let spec = &pspec.spec; let pgdata_path = Path::new(&self.pgdata); @@ -625,7 +623,7 @@ impl ComputeNode { config::write_postgres_conf( &pgdata_path.join("postgresql.conf"), &pspec.spec, - Some(extension_server_port), + self.http_port, )?; // Syncing safekeepers is only safe with primary nodes: if a primary @@ -1243,14 +1241,9 @@ impl ComputeNode { // Write new config let pgdata_path = Path::new(&self.pgdata); let postgresql_conf_path = pgdata_path.join("postgresql.conf"); - config::write_postgres_conf(&postgresql_conf_path, &spec, None)?; + config::write_postgres_conf(&postgresql_conf_path, &spec, self.http_port)?; - // TODO(ololobus): We need a concurrency during reconfiguration as well, - // but DB is already running and used by user. We can easily get out of - // `max_connections` limit, and the current code won't handle that. - // let compute_state = self.state.lock().unwrap().clone(); - // let max_concurrent_connections = self.max_service_connections(&compute_state, &spec); - let max_concurrent_connections = 1; + let max_concurrent_connections = spec.reconfigure_concurrency; // Temporarily reset max_cluster_size in config // to avoid the possibility of hitting the limit, while we are reconfiguring: @@ -1284,10 +1277,7 @@ impl ComputeNode { } #[instrument(skip_all)] - pub fn start_compute( - &self, - extension_server_port: u16, - ) -> Result<(std::process::Child, std::thread::JoinHandle<()>)> { + pub fn start_compute(&self) -> Result<(std::process::Child, std::thread::JoinHandle<()>)> { let compute_state = self.state.lock().unwrap().clone(); let pspec = compute_state.pspec.as_ref().expect("spec must be set"); info!( @@ -1362,7 +1352,7 @@ impl ComputeNode { info!("{:?}", remote_ext_metrics); } - self.prepare_pgdata(&compute_state, extension_server_port)?; + self.prepare_pgdata(&compute_state)?; let start_time = Utc::now(); let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?; diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs index d65fe7319401..b257c8a68f81 100644 --- a/compute_tools/src/config.rs +++ b/compute_tools/src/config.rs @@ -37,7 +37,7 @@ pub fn line_in_file(path: &Path, line: &str) -> Result { pub fn write_postgres_conf( path: &Path, spec: &ComputeSpec, - extension_server_port: Option, + extension_server_port: u16, ) -> Result<()> { // File::create() destroys the file content if it exists. let mut file = File::create(path)?; @@ -127,9 +127,7 @@ pub fn write_postgres_conf( writeln!(file, "# Managed by compute_ctl: end")?; } - if let Some(port) = extension_server_port { - writeln!(file, "neon.extension_server_port={}", port)?; - } + writeln!(file, "neon.extension_server_port={}", extension_server_port)?; // This is essential to keep this line at the end of the file, // because it is intended to override any settings above. diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 71514daa7cc5..35067c95b6d0 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -53,6 +53,7 @@ use compute_api::spec::Role; use nix::sys::signal::kill; use nix::sys::signal::Signal; use pageserver_api::shard::ShardStripeSize; +use reqwest::header::CONTENT_TYPE; use serde::{Deserialize, Serialize}; use url::Host; use utils::id::{NodeId, TenantId, TimelineId}; @@ -310,6 +311,10 @@ impl Endpoint { conf.append("wal_log_hints", "off"); conf.append("max_replication_slots", "10"); conf.append("hot_standby", "on"); + // Set to 1MB to both exercise getPage requests/LFC, and still have enough room for + // Postgres to operate. Everything smaller might be not enough for Postgres under load, + // and can cause errors like 'no unpinned buffers available', see + // conf.append("shared_buffers", "1MB"); conf.append("fsync", "off"); conf.append("max_connections", "100"); @@ -614,6 +619,7 @@ impl Endpoint { pgbouncer_settings: None, shard_stripe_size: Some(shard_stripe_size), local_proxy_config: None, + reconfigure_concurrency: 1, }; let spec_path = self.endpoint_path().join("spec.json"); std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?; @@ -813,6 +819,7 @@ impl Endpoint { self.http_address.ip(), self.http_address.port() )) + .header(CONTENT_TYPE.as_str(), "application/json") .body(format!( "{{\"spec\":{}}}", serde_json::to_string_pretty(&spec)? diff --git a/control_plane/src/safekeeper.rs b/control_plane/src/safekeeper.rs index 7a019bce886a..f0c37229254c 100644 --- a/control_plane/src/safekeeper.rs +++ b/control_plane/src/safekeeper.rs @@ -5,6 +5,7 @@ //! ```text //! .neon/safekeepers/ //! ``` +use std::error::Error as _; use std::future::Future; use std::io::Write; use std::path::PathBuf; @@ -26,7 +27,7 @@ use crate::{ #[derive(Error, Debug)] pub enum SafekeeperHttpError { - #[error("Reqwest error: {0}")] + #[error("request error: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())] Transport(#[from] reqwest::Error), #[error("Error: {0}")] diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs index b7f38c628660..e87942453260 100644 --- a/control_plane/storcon_cli/src/main.rs +++ b/control_plane/storcon_cli/src/main.rs @@ -560,14 +560,26 @@ async fn main() -> anyhow::Result<()> { .await?; } Command::TenantDescribe { tenant_id } => { - let describe_response = storcon_client + let TenantDescribeResponse { + tenant_id, + shards, + stripe_size, + policy, + config, + } = storcon_client .dispatch::<(), TenantDescribeResponse>( Method::GET, format!("control/v1/tenant/{tenant_id}"), None, ) .await?; - let shards = describe_response.shards; + println!("Tenant {tenant_id}"); + let mut table = comfy_table::Table::new(); + table.add_row(["Policy", &format!("{:?}", policy)]); + table.add_row(["Stripe size", &format!("{:?}", stripe_size)]); + table.add_row(["Config", &serde_json::to_string_pretty(&config).unwrap()]); + println!("{table}"); + println!("Shards:"); let mut table = comfy_table::Table::new(); table.set_header(["Shard", "Attached", "Secondary", "Last error", "status"]); for shard in shards { diff --git a/docker-compose/compute_wrapper/Dockerfile b/docker-compose/compute_wrapper/Dockerfile index 8378f37b484c..05a2cf124cad 100644 --- a/docker-compose/compute_wrapper/Dockerfile +++ b/docker-compose/compute_wrapper/Dockerfile @@ -4,14 +4,16 @@ ARG TAG=latest FROM $REPOSITORY/${COMPUTE_IMAGE}:$TAG +ARG COMPUTE_IMAGE + USER root RUN apt-get update && \ apt-get install -y curl \ jq \ python3-pip \ - netcat + netcat-openbsd #Faker is required for the pg_anon test -RUN pip3 install Faker +RUN case $COMPUTE_IMAGE in compute-node-v17) OPT="--break-system-packages";; *) OPT= ;; esac && pip3 install $OPT Faker #This is required for the pg_hintplan test RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src diff --git a/docker-compose/docker_compose_test.sh b/docker-compose/docker_compose_test.sh index 10805a99522c..c97dfaa901e8 100755 --- a/docker-compose/docker_compose_test.sh +++ b/docker-compose/docker_compose_test.sh @@ -30,10 +30,17 @@ cleanup() { docker compose --profile test-extensions -f $COMPOSE_FILE down } -for pg_version in 14 15 16; do +for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do + pg_version=${pg_version/v/} echo "clean up containers if exists" cleanup - PG_TEST_VERSION=$(($pg_version < 16 ? 16 : $pg_version)) + PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version)) + # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option + if [ $pg_version -eq 17 ]; then + SPEC_PATH="compute_wrapper/var/db/postgres/specs" + mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak + jq 'del(.cluster.settings[] | select (.name == "session_preload_libraries"))' $SPEC_PATH/spec.bak > $SPEC_PATH/spec.json + fi PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d echo "wait until the compute is ready. timeout after 60s. " @@ -54,8 +61,7 @@ for pg_version in 14 15 16; do fi done - if [ $pg_version -ge 16 ] - then + if [ $pg_version -ge 16 ]; then echo Enabling trust connection docker exec $COMPUTE_CONTAINER_NAME bash -c "sed -i '\$d' /var/db/postgres/compute/pg_hba.conf && echo -e 'host\t all\t all\t all\t trust' >> /var/db/postgres/compute/pg_hba.conf && psql $PSQL_OPTION -c 'select pg_reload_conf()' " echo Adding postgres role @@ -68,10 +74,13 @@ for pg_version in 14 15 16; do # The test assumes that it is running on the same host with the postgres engine. # In our case it's not true, that's why we are copying files to the compute node TMPDIR=$(mktemp -d) - docker cp $TEST_CONTAINER_NAME:/ext-src/pg_anon-src/data $TMPDIR/data - echo -e '1\t too \t many \t tabs' > $TMPDIR/data/bad.csv - docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/tmp/tmp_anon_alternate_data + # Add support for pg_anon for pg_v16 + if [ $pg_version -ne 17 ]; then + docker cp $TEST_CONTAINER_NAME:/ext-src/pg_anon-src/data $TMPDIR/data + echo -e '1\t too \t many \t tabs' > $TMPDIR/data/bad.csv + docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/tmp/tmp_anon_alternate_data rm -rf $TMPDIR + fi TMPDIR=$(mktemp -d) # The following block does the same for the pg_hintplan test docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data @@ -97,4 +106,8 @@ for pg_version in 14 15 16; do fi fi cleanup + # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option + if [ $pg_version -eq 17 ]; then + mv $SPEC_PATH/spec.bak $SPEC_PATH/spec.json + fi done diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index 8a447563dcf2..6d9c353cda1b 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -19,6 +19,10 @@ pub type PgIdent = String; /// String type alias representing Postgres extension version pub type ExtVersion = String; +fn default_reconfigure_concurrency() -> usize { + 1 +} + /// Cluster spec or configuration represented as an optional number of /// delta operations + final cluster state description. #[derive(Clone, Debug, Default, Deserialize, Serialize)] @@ -67,7 +71,7 @@ pub struct ComputeSpec { pub cluster: Cluster, pub delta_operations: Option>, - /// An optinal hint that can be passed to speed up startup time if we know + /// An optional hint that can be passed to speed up startup time if we know /// that no pg catalog mutations (like role creation, database creation, /// extension creation) need to be done on the actual database to start. #[serde(default)] // Default false @@ -86,9 +90,7 @@ pub struct ComputeSpec { // etc. GUCs in cluster.settings. TODO: Once the control plane has been // updated to fill these fields, we can make these non optional. pub tenant_id: Option, - pub timeline_id: Option, - pub pageserver_connstring: Option, #[serde(default)] @@ -113,6 +115,20 @@ pub struct ComputeSpec { /// Local Proxy configuration used for JWT authentication #[serde(default)] pub local_proxy_config: Option, + + /// Number of concurrent connections during the parallel RunInEachDatabase + /// phase of the apply config process. + /// + /// We need a higher concurrency during reconfiguration in case of many DBs, + /// but instance is already running and used by client. We can easily get out of + /// `max_connections` limit, and the current code won't handle that. + /// + /// Default is 1, but also allow control plane to override this value for specific + /// projects. It's also recommended to bump `superuser_reserved_connections` += + /// `reconfigure_concurrency` for such projects to ensure that we always have + /// enough spare connections for reconfiguration process to succeed. + #[serde(default = "default_reconfigure_concurrency")] + pub reconfigure_concurrency: usize, } /// Feature flag to signal `compute_ctl` to enable certain experimental functionality. @@ -315,6 +331,9 @@ mod tests { // Features list defaults to empty vector. assert!(spec.features.is_empty()); + + // Reconfigure concurrency defaults to 1. + assert_eq!(spec.reconfigure_concurrency, 1); } #[test] diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index e49d15ba87a0..09cfbc55fd1c 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -442,7 +442,14 @@ impl Default for ConfigToml { tenant_config: TenantConfigToml::default(), no_sync: None, wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL, - page_service_pipelining: PageServicePipeliningConfig::Serial, + page_service_pipelining: if !cfg!(test) { + PageServicePipeliningConfig::Serial + } else { + PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined { + max_batch_size: NonZeroUsize::new(32).unwrap(), + execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures, + }) + }, } } } diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 0ea30ce54f78..6839ef69f592 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -48,7 +48,7 @@ pub struct TenantCreateResponse { pub shards: Vec, } -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct NodeRegisterRequest { pub node_id: NodeId, @@ -75,7 +75,7 @@ pub struct TenantPolicyRequest { pub scheduling: Option, } -#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug)] pub struct AvailabilityZone(pub String); impl Display for AvailabilityZone { @@ -245,6 +245,17 @@ impl From for NodeAvailabilityWrapper { } } +/// Scheduling policy enables us to selectively disable some automatic actions that the +/// controller performs on a tenant shard. This is only set to a non-default value by +/// human intervention, and it is reset to the default value (Active) when the tenant's +/// placement policy is modified away from Attached. +/// +/// The typical use of a non-Active scheduling policy is one of: +/// - Pinnning a shard to a node (i.e. migrating it there & setting a non-Active scheduling policy) +/// - Working around a bug (e.g. if something is flapping and we need to stop it until the bug is fixed) +/// +/// If you're not sure which policy to use to pin a shard to its current location, you probably +/// want Pause. #[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)] pub enum ShardSchedulingPolicy { // Normal mode: the tenant's scheduled locations may be updated at will, including diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs index 523d1433818b..37dff6fe4647 100644 --- a/libs/pageserver_api/src/key.rs +++ b/libs/pageserver_api/src/key.rs @@ -770,6 +770,11 @@ impl Key { && self.field6 == 1 } + #[inline(always)] + pub fn is_aux_file_key(&self) -> bool { + self.field1 == AUX_KEY_PREFIX + } + /// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`. #[inline(always)] pub fn to_rel_block(self) -> anyhow::Result<(RelTag, BlockNumber)> { diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 42c5d10c053b..5488f7b2c29b 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -501,7 +501,9 @@ pub struct EvictionPolicyLayerAccessThreshold { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub struct ThrottleConfig { - pub task_kinds: Vec, // TaskKind + /// See [`ThrottleConfigTaskKinds`] for why we do the serde `rename`. + #[serde(rename = "task_kinds")] + pub enabled: ThrottleConfigTaskKinds, pub initial: u32, #[serde(with = "humantime_serde")] pub refill_interval: Duration, @@ -509,10 +511,38 @@ pub struct ThrottleConfig { pub max: u32, } +/// Before +/// the throttle was a per `Timeline::get`/`Timeline::get_vectored` call. +/// The `task_kinds` field controlled which Pageserver "Task Kind"s +/// were subject to the throttle. +/// +/// After that PR, the throttle is applied at pagestream request level +/// and the `task_kinds` field does not apply since the only task kind +/// that us subject to the throttle is that of the page service. +/// +/// However, we don't want to make a breaking config change right now +/// because it means we have to migrate all the tenant configs. +/// This will be done in a future PR. +/// +/// In the meantime, we use emptiness / non-emptsiness of the `task_kinds` +/// field to determine if the throttle is enabled or not. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(transparent)] +pub struct ThrottleConfigTaskKinds(Vec); + +impl ThrottleConfigTaskKinds { + pub fn disabled() -> Self { + Self(vec![]) + } + pub fn is_enabled(&self) -> bool { + !self.0.is_empty() + } +} + impl ThrottleConfig { pub fn disabled() -> Self { Self { - task_kinds: vec![], // effectively disables the throttle + enabled: ThrottleConfigTaskKinds::disabled(), // other values don't matter with emtpy `task_kinds`. initial: 0, refill_interval: Duration::from_millis(1), @@ -526,6 +556,30 @@ impl ThrottleConfig { } } +#[cfg(test)] +mod throttle_config_tests { + use super::*; + + #[test] + fn test_disabled_is_disabled() { + let config = ThrottleConfig::disabled(); + assert!(!config.enabled.is_enabled()); + } + #[test] + fn test_enabled_backwards_compat() { + let input = serde_json::json!({ + "task_kinds": ["PageRequestHandler"], + "initial": 40000, + "refill_interval": "50ms", + "refill_amount": 1000, + "max": 40000, + "fair": true + }); + let config: ThrottleConfig = serde_json::from_value(input).unwrap(); + assert!(config.enabled.is_enabled()); + } +} + /// A flattened analog of a `pagesever::tenant::LocationMode`, which /// lists out all possible states (and the virtual "Detached" state) /// in a flat form rather than using rust-style enums. diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs index e83cf4c855a1..cf0cd3a46b88 100644 --- a/libs/pageserver_api/src/shard.rs +++ b/libs/pageserver_api/src/shard.rs @@ -158,7 +158,8 @@ impl ShardIdentity { key_to_shard_number(self.count, self.stripe_size, key) } - /// Return true if the key should be ingested by this shard + /// Return true if the key is stored only on this shard. This does not include + /// global keys, see is_key_global(). /// /// Shards must ingest _at least_ keys which return true from this check. pub fn is_key_local(&self, key: &Key) -> bool { @@ -170,19 +171,37 @@ impl ShardIdentity { } } + /// Return true if the key should be stored on all shards, not just one. + pub fn is_key_global(&self, key: &Key) -> bool { + if key.is_slru_block_key() || key.is_slru_segment_size_key() || key.is_aux_file_key() { + // Special keys that are only stored on shard 0 + false + } else if key.is_rel_block_key() { + // Ordinary relation blocks are distributed across shards + false + } else if key.is_rel_size_key() { + // All shards maintain rel size keys (although only shard 0 is responsible for + // keeping it strictly accurate, other shards just reflect the highest block they've ingested) + true + } else { + // For everything else, we assume it must be kept everywhere, because ingest code + // might assume this -- this covers functionality where the ingest code has + // not (yet) been made fully shard aware. + true + } + } + /// Return true if the key should be discarded if found in this shard's /// data store, e.g. during compaction after a split. /// /// Shards _may_ drop keys which return false here, but are not obliged to. pub fn is_key_disposable(&self, key: &Key) -> bool { - if key_is_shard0(key) { - // Q: Why can't we dispose of shard0 content if we're not shard 0? - // A1: because the WAL ingestion logic currently ingests some shard 0 - // content on all shards, even though it's only read on shard 0. If we - // dropped it, then subsequent WAL ingest to these keys would encounter - // an error. - // A2: because key_is_shard0 also covers relation size keys, which are written - // on all shards even though they're only maintained accurately on shard 0. + if self.count < ShardCount(2) { + // Fast path: unsharded tenant doesn't dispose of anything + return false; + } + + if self.is_key_global(key) { false } else { !self.is_key_local(key) diff --git a/libs/pq_proto/src/lib.rs b/libs/pq_proto/src/lib.rs index 4b0331999d33..94714359a3d8 100644 --- a/libs/pq_proto/src/lib.rs +++ b/libs/pq_proto/src/lib.rs @@ -100,7 +100,7 @@ impl StartupMessageParamsBuilder { #[derive(Debug, Clone, Default)] pub struct StartupMessageParams { - params: Bytes, + pub params: Bytes, } impl StartupMessageParams { @@ -565,6 +565,8 @@ pub enum BeMessage<'a> { /// Batch of interpreted, shard filtered WAL records, /// ready for the pageserver to ingest InterpretedWalRecords(InterpretedWalRecordsBody<'a>), + + Raw(u8, &'a [u8]), } /// Common shorthands. @@ -754,6 +756,10 @@ impl BeMessage<'_> { /// one more buffer. pub fn write(buf: &mut BytesMut, message: &BeMessage) -> Result<(), ProtocolError> { match message { + BeMessage::Raw(code, data) => { + buf.put_u8(*code); + write_body(buf, |b| b.put_slice(data)) + } BeMessage::AuthenticationOk => { buf.put_u8(b'R'); write_body(buf, |buf| { diff --git a/libs/proxy/postgres-protocol2/Cargo.toml b/libs/proxy/postgres-protocol2/Cargo.toml index 284a632954fd..f71c1599c7c2 100644 --- a/libs/proxy/postgres-protocol2/Cargo.toml +++ b/libs/proxy/postgres-protocol2/Cargo.toml @@ -10,7 +10,6 @@ byteorder.workspace = true bytes.workspace = true fallible-iterator.workspace = true hmac.workspace = true -md-5 = "0.10" memchr = "2.0" rand.workspace = true sha2.workspace = true diff --git a/libs/proxy/postgres-protocol2/src/authentication/mod.rs b/libs/proxy/postgres-protocol2/src/authentication/mod.rs index 71afa4b9b60a..0bdc177143fb 100644 --- a/libs/proxy/postgres-protocol2/src/authentication/mod.rs +++ b/libs/proxy/postgres-protocol2/src/authentication/mod.rs @@ -1,37 +1,2 @@ //! Authentication protocol support. -use md5::{Digest, Md5}; - pub mod sasl; - -/// Hashes authentication information in a way suitable for use in response -/// to an `AuthenticationMd5Password` message. -/// -/// The resulting string should be sent back to the database in a -/// `PasswordMessage` message. -#[inline] -pub fn md5_hash(username: &[u8], password: &[u8], salt: [u8; 4]) -> String { - let mut md5 = Md5::new(); - md5.update(password); - md5.update(username); - let output = md5.finalize_reset(); - md5.update(format!("{:x}", output)); - md5.update(salt); - format!("md5{:x}", md5.finalize()) -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn md5() { - let username = b"md5_user"; - let password = b"password"; - let salt = [0x2a, 0x3d, 0x8f, 0xe0]; - - assert_eq!( - md5_hash(username, password, salt), - "md562af4dd09bbb41884907a838a3233294" - ); - } -} diff --git a/libs/proxy/postgres-protocol2/src/authentication/sasl.rs b/libs/proxy/postgres-protocol2/src/authentication/sasl.rs index 19aa3c1e9aaa..f2200a40ce59 100644 --- a/libs/proxy/postgres-protocol2/src/authentication/sasl.rs +++ b/libs/proxy/postgres-protocol2/src/authentication/sasl.rs @@ -117,7 +117,7 @@ enum Credentials { /// A regular password as a vector of bytes. Password(Vec), /// A precomputed pair of keys. - Keys(Box>), + Keys(ScramKeys), } enum State { @@ -176,7 +176,7 @@ impl ScramSha256 { /// Constructs a new instance which will use the provided key pair for authentication. pub fn new_with_keys(keys: ScramKeys<32>, channel_binding: ChannelBinding) -> ScramSha256 { - let password = Credentials::Keys(keys.into()); + let password = Credentials::Keys(keys); ScramSha256::new_inner(password, channel_binding, nonce()) } diff --git a/libs/proxy/postgres-protocol2/src/message/backend.rs b/libs/proxy/postgres-protocol2/src/message/backend.rs index 356d142f3fc8..097964f9c110 100644 --- a/libs/proxy/postgres-protocol2/src/message/backend.rs +++ b/libs/proxy/postgres-protocol2/src/message/backend.rs @@ -79,7 +79,7 @@ pub enum Message { AuthenticationCleartextPassword, AuthenticationGss, AuthenticationKerberosV5, - AuthenticationMd5Password(AuthenticationMd5PasswordBody), + AuthenticationMd5Password, AuthenticationOk, AuthenticationScmCredential, AuthenticationSspi, @@ -191,11 +191,7 @@ impl Message { 0 => Message::AuthenticationOk, 2 => Message::AuthenticationKerberosV5, 3 => Message::AuthenticationCleartextPassword, - 5 => { - let mut salt = [0; 4]; - buf.read_exact(&mut salt)?; - Message::AuthenticationMd5Password(AuthenticationMd5PasswordBody { salt }) - } + 5 => Message::AuthenticationMd5Password, 6 => Message::AuthenticationScmCredential, 7 => Message::AuthenticationGss, 8 => Message::AuthenticationGssContinue, @@ -541,6 +537,10 @@ impl NoticeResponseBody { pub fn fields(&self) -> ErrorFields<'_> { ErrorFields { buf: &self.storage } } + + pub fn as_bytes(&self) -> &[u8] { + &self.storage + } } pub struct NotificationResponseBody { diff --git a/libs/proxy/postgres-protocol2/src/message/frontend.rs b/libs/proxy/postgres-protocol2/src/message/frontend.rs index 5d0a8ff8c838..bc6168f33732 100644 --- a/libs/proxy/postgres-protocol2/src/message/frontend.rs +++ b/libs/proxy/postgres-protocol2/src/message/frontend.rs @@ -255,22 +255,34 @@ pub fn ssl_request(buf: &mut BytesMut) { } #[inline] -pub fn startup_message<'a, I>(parameters: I, buf: &mut BytesMut) -> io::Result<()> -where - I: IntoIterator, -{ +pub fn startup_message(parameters: &StartupMessageParams, buf: &mut BytesMut) -> io::Result<()> { write_body(buf, |buf| { // postgres protocol version 3.0(196608) in bigger-endian buf.put_i32(0x00_03_00_00); - for (key, value) in parameters { - write_cstr(key.as_bytes(), buf)?; - write_cstr(value.as_bytes(), buf)?; - } + buf.put_slice(¶meters.params); buf.put_u8(0); Ok(()) }) } +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct StartupMessageParams { + pub params: BytesMut, +} + +impl StartupMessageParams { + /// Set parameter's value by its name. + pub fn insert(&mut self, name: &str, value: &str) { + if name.contains('\0') || value.contains('\0') { + panic!("startup parameter name or value contained a null") + } + self.params.put_slice(name.as_bytes()); + self.params.put_u8(0); + self.params.put_slice(value.as_bytes()); + self.params.put_u8(0); + } +} + #[inline] pub fn sync(buf: &mut BytesMut) { buf.put_u8(b'S'); diff --git a/libs/proxy/postgres-protocol2/src/password/mod.rs b/libs/proxy/postgres-protocol2/src/password/mod.rs index e669e80f3f22..38eb31dfcf99 100644 --- a/libs/proxy/postgres-protocol2/src/password/mod.rs +++ b/libs/proxy/postgres-protocol2/src/password/mod.rs @@ -8,7 +8,6 @@ use crate::authentication::sasl; use hmac::{Hmac, Mac}; -use md5::Md5; use rand::RngCore; use sha2::digest::FixedOutput; use sha2::{Digest, Sha256}; @@ -88,20 +87,3 @@ pub(crate) async fn scram_sha_256_salt( base64::encode(server_key) ) } - -/// **Not recommended, as MD5 is not considered to be secure.** -/// -/// Hash password using MD5 with the username as the salt. -/// -/// The client may assume the returned string doesn't contain any -/// special characters that would require escaping. -pub fn md5(password: &[u8], username: &str) -> String { - // salt password with username - let mut salted_password = Vec::from(password); - salted_password.extend_from_slice(username.as_bytes()); - - let mut hash = Md5::new(); - hash.update(&salted_password); - let digest = hash.finalize(); - format!("md5{:x}", digest) -} diff --git a/libs/proxy/postgres-protocol2/src/password/test.rs b/libs/proxy/postgres-protocol2/src/password/test.rs index c9d340f09d80..0692c07adbb1 100644 --- a/libs/proxy/postgres-protocol2/src/password/test.rs +++ b/libs/proxy/postgres-protocol2/src/password/test.rs @@ -9,11 +9,3 @@ async fn test_encrypt_scram_sha_256() { "SCRAM-SHA-256$4096:AQIDBAUGBwgJCgsMDQ4PEA==$8rrDg00OqaiWXJ7p+sCgHEIaBSHY89ZJl3mfIsf32oY=:05L1f+yZbiN8O0AnO40Og85NNRhvzTS57naKRWCcsIA=" ); } - -#[test] -fn test_encrypt_md5() { - assert_eq!( - password::md5(b"secret", "foo"), - "md54ab2c5d00339c4b2a4e921d2dc4edec7" - ); -} diff --git a/libs/proxy/tokio-postgres2/src/cancel_token.rs b/libs/proxy/tokio-postgres2/src/cancel_token.rs index b949bf358f37..a10e8bf5c3a4 100644 --- a/libs/proxy/tokio-postgres2/src/cancel_token.rs +++ b/libs/proxy/tokio-postgres2/src/cancel_token.rs @@ -10,10 +10,10 @@ use tokio::net::TcpStream; /// connection. #[derive(Clone)] pub struct CancelToken { - pub(crate) socket_config: Option, - pub(crate) ssl_mode: SslMode, - pub(crate) process_id: i32, - pub(crate) secret_key: i32, + pub socket_config: Option, + pub ssl_mode: SslMode, + pub process_id: i32, + pub secret_key: i32, } impl CancelToken { diff --git a/libs/proxy/tokio-postgres2/src/client.rs b/libs/proxy/tokio-postgres2/src/client.rs index 96200b71e73b..a7cd53afc35d 100644 --- a/libs/proxy/tokio-postgres2/src/client.rs +++ b/libs/proxy/tokio-postgres2/src/client.rs @@ -138,7 +138,7 @@ impl InnerClient { } #[derive(Clone)] -pub(crate) struct SocketConfig { +pub struct SocketConfig { pub host: Host, pub port: u16, pub connect_timeout: Option, @@ -152,7 +152,7 @@ pub(crate) struct SocketConfig { pub struct Client { inner: Arc, - socket_config: Option, + socket_config: SocketConfig, ssl_mode: SslMode, process_id: i32, secret_key: i32, @@ -161,6 +161,7 @@ pub struct Client { impl Client { pub(crate) fn new( sender: mpsc::UnboundedSender, + socket_config: SocketConfig, ssl_mode: SslMode, process_id: i32, secret_key: i32, @@ -172,7 +173,7 @@ impl Client { buffer: Default::default(), }), - socket_config: None, + socket_config, ssl_mode, process_id, secret_key, @@ -188,10 +189,6 @@ impl Client { &self.inner } - pub(crate) fn set_socket_config(&mut self, socket_config: SocketConfig) { - self.socket_config = Some(socket_config); - } - /// Creates a new prepared statement. /// /// Prepared statements can be executed repeatedly, and may contain query parameters (indicated by `$1`, `$2`, etc), @@ -412,7 +409,7 @@ impl Client { /// connection associated with this client. pub fn cancel_token(&self) -> CancelToken { CancelToken { - socket_config: self.socket_config.clone(), + socket_config: Some(self.socket_config.clone()), ssl_mode: self.ssl_mode, process_id: self.process_id, secret_key: self.secret_key, diff --git a/libs/proxy/tokio-postgres2/src/codec.rs b/libs/proxy/tokio-postgres2/src/codec.rs index 7412db785b0a..0ec46198ce42 100644 --- a/libs/proxy/tokio-postgres2/src/codec.rs +++ b/libs/proxy/tokio-postgres2/src/codec.rs @@ -35,9 +35,7 @@ impl FallibleIterator for BackendMessages { } } -pub struct PostgresCodec { - pub max_message_size: Option, -} +pub struct PostgresCodec; impl Encoder for PostgresCodec { type Error = io::Error; @@ -66,15 +64,6 @@ impl Decoder for PostgresCodec { break; } - if let Some(max) = self.max_message_size { - if len > max { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - "message too large", - )); - } - } - match header.tag() { backend::NOTICE_RESPONSE_TAG | backend::NOTIFICATION_RESPONSE_TAG diff --git a/libs/proxy/tokio-postgres2/src/config.rs b/libs/proxy/tokio-postgres2/src/config.rs index 969c20ba47e2..11a361a81b66 100644 --- a/libs/proxy/tokio-postgres2/src/config.rs +++ b/libs/proxy/tokio-postgres2/src/config.rs @@ -2,29 +2,19 @@ use crate::connect::connect; use crate::connect_raw::connect_raw; +use crate::connect_raw::RawConnection; use crate::tls::MakeTlsConnect; use crate::tls::TlsConnect; use crate::{Client, Connection, Error}; -use std::borrow::Cow; +use postgres_protocol2::message::frontend::StartupMessageParams; +use std::fmt; use std::str; -use std::str::FromStr; use std::time::Duration; -use std::{error, fmt, iter, mem}; use tokio::io::{AsyncRead, AsyncWrite}; pub use postgres_protocol2::authentication::sasl::ScramKeys; use tokio::net::TcpStream; -/// Properties required of a session. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -#[non_exhaustive] -pub enum TargetSessionAttrs { - /// No special properties are required. - Any, - /// The session must allow writes. - ReadWrite, -} - /// TLS configuration. #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[non_exhaustive] @@ -74,119 +64,37 @@ pub enum AuthKeys { } /// Connection configuration. -/// -/// Configuration can be parsed from libpq-style connection strings. These strings come in two formats: -/// -/// # Key-Value -/// -/// This format consists of space-separated key-value pairs. Values which are either the empty string or contain -/// whitespace should be wrapped in `'`. `'` and `\` characters should be backslash-escaped. -/// -/// ## Keys -/// -/// * `user` - The username to authenticate with. Required. -/// * `password` - The password to authenticate with. -/// * `dbname` - The name of the database to connect to. Defaults to the username. -/// * `options` - Command line options used to configure the server. -/// * `application_name` - Sets the `application_name` parameter on the server. -/// * `sslmode` - Controls usage of TLS. If set to `disable`, TLS will not be used. If set to `prefer`, TLS will be used -/// if available, but not used otherwise. If set to `require`, TLS will be forced to be used. Defaults to `prefer`. -/// * `host` - The host to connect to. On Unix platforms, if the host starts with a `/` character it is treated as the -/// path to the directory containing Unix domain sockets. Otherwise, it is treated as a hostname. Multiple hosts -/// can be specified, separated by commas. Each host will be tried in turn when connecting. Required if connecting -/// with the `connect` method. -/// * `port` - The port to connect to. Multiple ports can be specified, separated by commas. The number of ports must be -/// either 1, in which case it will be used for all hosts, or the same as the number of hosts. Defaults to 5432 if -/// omitted or the empty string. -/// * `connect_timeout` - The time limit in seconds applied to each socket-level connection attempt. Note that hostnames -/// can resolve to multiple IP addresses, and this limit is applied to each address. Defaults to no timeout. -/// * `target_session_attrs` - Specifies requirements of the session. If set to `read-write`, the client will check that -/// the `transaction_read_write` session parameter is set to `on`. This can be used to connect to the primary server -/// in a database cluster as opposed to the secondary read-only mirrors. Defaults to `all`. -/// * `channel_binding` - Controls usage of channel binding in the authentication process. If set to `disable`, channel -/// binding will not be used. If set to `prefer`, channel binding will be used if available, but not used otherwise. -/// If set to `require`, the authentication process will fail if channel binding is not used. Defaults to `prefer`. -/// -/// ## Examples -/// -/// ```not_rust -/// host=localhost user=postgres connect_timeout=10 keepalives=0 -/// ``` -/// -/// ```not_rust -/// host=/var/lib/postgresql,localhost port=1234 user=postgres password='password with spaces' -/// ``` -/// -/// ```not_rust -/// host=host1,host2,host3 port=1234,,5678 user=postgres target_session_attrs=read-write -/// ``` -/// -/// # Url -/// -/// This format resembles a URL with a scheme of either `postgres://` or `postgresql://`. All components are optional, -/// and the format accepts query parameters for all of the key-value pairs described in the section above. Multiple -/// host/port pairs can be comma-separated. Unix socket paths in the host section of the URL should be percent-encoded, -/// as the path component of the URL specifies the database name. -/// -/// ## Examples -/// -/// ```not_rust -/// postgresql://user@localhost -/// ``` -/// -/// ```not_rust -/// postgresql://user:password@%2Fvar%2Flib%2Fpostgresql/mydb?connect_timeout=10 -/// ``` -/// -/// ```not_rust -/// postgresql://user@host1:1234,host2,host3:5678?target_session_attrs=read-write -/// ``` -/// -/// ```not_rust -/// postgresql:///mydb?user=user&host=/var/lib/postgresql -/// ``` #[derive(Clone, PartialEq, Eq)] pub struct Config { - pub(crate) user: Option, + pub(crate) host: Host, + pub(crate) port: u16, + pub(crate) password: Option>, pub(crate) auth_keys: Option>, - pub(crate) dbname: Option, - pub(crate) options: Option, - pub(crate) application_name: Option, pub(crate) ssl_mode: SslMode, - pub(crate) host: Vec, - pub(crate) port: Vec, pub(crate) connect_timeout: Option, - pub(crate) target_session_attrs: TargetSessionAttrs, pub(crate) channel_binding: ChannelBinding, - pub(crate) replication_mode: Option, - pub(crate) max_backend_message_size: Option, -} + pub(crate) server_params: StartupMessageParams, -impl Default for Config { - fn default() -> Config { - Config::new() - } + database: bool, + username: bool, } impl Config { /// Creates a new configuration. - pub fn new() -> Config { + pub fn new(host: String, port: u16) -> Config { Config { - user: None, + host: Host::Tcp(host), + port, password: None, auth_keys: None, - dbname: None, - options: None, - application_name: None, ssl_mode: SslMode::Prefer, - host: vec![], - port: vec![], connect_timeout: None, - target_session_attrs: TargetSessionAttrs::Any, channel_binding: ChannelBinding::Prefer, - replication_mode: None, - max_backend_message_size: None, + server_params: StartupMessageParams::default(), + + database: false, + username: false, } } @@ -194,14 +102,13 @@ impl Config { /// /// Required. pub fn user(&mut self, user: &str) -> &mut Config { - self.user = Some(user.to_string()); - self + self.set_param("user", user) } /// Gets the user to authenticate with, if one has been configured with /// the `user` method. - pub fn get_user(&self) -> Option<&str> { - self.user.as_deref() + pub fn user_is_set(&self) -> bool { + self.username } /// Sets the password to authenticate with. @@ -237,40 +144,26 @@ impl Config { /// /// Defaults to the user. pub fn dbname(&mut self, dbname: &str) -> &mut Config { - self.dbname = Some(dbname.to_string()); - self + self.set_param("database", dbname) } /// Gets the name of the database to connect to, if one has been configured /// with the `dbname` method. - pub fn get_dbname(&self) -> Option<&str> { - self.dbname.as_deref() - } - - /// Sets command line options used to configure the server. - pub fn options(&mut self, options: &str) -> &mut Config { - self.options = Some(options.to_string()); - self + pub fn db_is_set(&self) -> bool { + self.database } - /// Gets the command line options used to configure the server, if the - /// options have been set with the `options` method. - pub fn get_options(&self) -> Option<&str> { - self.options.as_deref() - } + pub fn set_param(&mut self, name: &str, value: &str) -> &mut Config { + if name == "database" { + self.database = true; + } else if name == "user" { + self.username = true; + } - /// Sets the value of the `application_name` runtime parameter. - pub fn application_name(&mut self, application_name: &str) -> &mut Config { - self.application_name = Some(application_name.to_string()); + self.server_params.insert(name, value); self } - /// Gets the value of the `application_name` runtime parameter, if it has - /// been set with the `application_name` method. - pub fn get_application_name(&self) -> Option<&str> { - self.application_name.as_deref() - } - /// Sets the SSL configuration. /// /// Defaults to `prefer`. @@ -284,32 +177,14 @@ impl Config { self.ssl_mode } - /// Adds a host to the configuration. - /// - /// Multiple hosts can be specified by calling this method multiple times, and each will be tried in order. - pub fn host(&mut self, host: &str) -> &mut Config { - self.host.push(Host::Tcp(host.to_string())); - self - } - /// Gets the hosts that have been added to the configuration with `host`. - pub fn get_hosts(&self) -> &[Host] { + pub fn get_host(&self) -> &Host { &self.host } - /// Adds a port to the configuration. - /// - /// Multiple ports can be specified by calling this method multiple times. There must either be no ports, in which - /// case the default of 5432 is used, a single port, in which it is used for all hosts, or the same number of ports - /// as hosts. - pub fn port(&mut self, port: u16) -> &mut Config { - self.port.push(port); - self - } - /// Gets the ports that have been added to the configuration with `port`. - pub fn get_ports(&self) -> &[u16] { - &self.port + pub fn get_port(&self) -> u16 { + self.port } /// Sets the timeout applied to socket-level connection attempts. @@ -327,23 +202,6 @@ impl Config { self.connect_timeout.as_ref() } - /// Sets the requirements of the session. - /// - /// This can be used to connect to the primary server in a clustered database rather than one of the read-only - /// secondary servers. Defaults to `Any`. - pub fn target_session_attrs( - &mut self, - target_session_attrs: TargetSessionAttrs, - ) -> &mut Config { - self.target_session_attrs = target_session_attrs; - self - } - - /// Gets the requirements of the session. - pub fn get_target_session_attrs(&self) -> TargetSessionAttrs { - self.target_session_attrs - } - /// Sets the channel binding behavior. /// /// Defaults to `prefer`. @@ -357,121 +215,6 @@ impl Config { self.channel_binding } - /// Set replication mode. - pub fn replication_mode(&mut self, replication_mode: ReplicationMode) -> &mut Config { - self.replication_mode = Some(replication_mode); - self - } - - /// Get replication mode. - pub fn get_replication_mode(&self) -> Option { - self.replication_mode - } - - /// Set limit for backend messages size. - pub fn max_backend_message_size(&mut self, max_backend_message_size: usize) -> &mut Config { - self.max_backend_message_size = Some(max_backend_message_size); - self - } - - /// Get limit for backend messages size. - pub fn get_max_backend_message_size(&self) -> Option { - self.max_backend_message_size - } - - fn param(&mut self, key: &str, value: &str) -> Result<(), Error> { - match key { - "user" => { - self.user(value); - } - "password" => { - self.password(value); - } - "dbname" => { - self.dbname(value); - } - "options" => { - self.options(value); - } - "application_name" => { - self.application_name(value); - } - "sslmode" => { - let mode = match value { - "disable" => SslMode::Disable, - "prefer" => SslMode::Prefer, - "require" => SslMode::Require, - _ => return Err(Error::config_parse(Box::new(InvalidValue("sslmode")))), - }; - self.ssl_mode(mode); - } - "host" => { - for host in value.split(',') { - self.host(host); - } - } - "port" => { - for port in value.split(',') { - let port = if port.is_empty() { - 5432 - } else { - port.parse() - .map_err(|_| Error::config_parse(Box::new(InvalidValue("port"))))? - }; - self.port(port); - } - } - "connect_timeout" => { - let timeout = value - .parse::() - .map_err(|_| Error::config_parse(Box::new(InvalidValue("connect_timeout"))))?; - if timeout > 0 { - self.connect_timeout(Duration::from_secs(timeout as u64)); - } - } - "target_session_attrs" => { - let target_session_attrs = match value { - "any" => TargetSessionAttrs::Any, - "read-write" => TargetSessionAttrs::ReadWrite, - _ => { - return Err(Error::config_parse(Box::new(InvalidValue( - "target_session_attrs", - )))); - } - }; - self.target_session_attrs(target_session_attrs); - } - "channel_binding" => { - let channel_binding = match value { - "disable" => ChannelBinding::Disable, - "prefer" => ChannelBinding::Prefer, - "require" => ChannelBinding::Require, - _ => { - return Err(Error::config_parse(Box::new(InvalidValue( - "channel_binding", - )))) - } - }; - self.channel_binding(channel_binding); - } - "max_backend_message_size" => { - let limit = value.parse::().map_err(|_| { - Error::config_parse(Box::new(InvalidValue("max_backend_message_size"))) - })?; - if limit > 0 { - self.max_backend_message_size(limit); - } - } - key => { - return Err(Error::config_parse(Box::new(UnknownOption( - key.to_string(), - )))); - } - } - - Ok(()) - } - /// Opens a connection to a PostgreSQL database. /// /// Requires the `runtime` Cargo feature (enabled by default). @@ -485,14 +228,11 @@ impl Config { connect(tls, self).await } - /// Connects to a PostgreSQL database over an arbitrary stream. - /// - /// All of the settings other than `user`, `password`, `dbname`, `options`, and `application_name` name are ignored. pub async fn connect_raw( &self, stream: S, tls: T, - ) -> Result<(Client, Connection), Error> + ) -> Result, Error> where S: AsyncRead + AsyncWrite + Unpin, T: TlsConnect, @@ -501,17 +241,6 @@ impl Config { } } -impl FromStr for Config { - type Err = Error; - - fn from_str(s: &str) -> Result { - match UrlParser::parse(s)? { - Some(config) => Ok(config), - None => Parser::parse(s), - } - } -} - // Omit password from debug output impl fmt::Debug for Config { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -523,375 +252,13 @@ impl fmt::Debug for Config { } f.debug_struct("Config") - .field("user", &self.user) .field("password", &self.password.as_ref().map(|_| Redaction {})) - .field("dbname", &self.dbname) - .field("options", &self.options) - .field("application_name", &self.application_name) .field("ssl_mode", &self.ssl_mode) .field("host", &self.host) .field("port", &self.port) .field("connect_timeout", &self.connect_timeout) - .field("target_session_attrs", &self.target_session_attrs) .field("channel_binding", &self.channel_binding) - .field("replication", &self.replication_mode) + .field("server_params", &self.server_params) .finish() } } - -#[derive(Debug)] -struct UnknownOption(String); - -impl fmt::Display for UnknownOption { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(fmt, "unknown option `{}`", self.0) - } -} - -impl error::Error for UnknownOption {} - -#[derive(Debug)] -struct InvalidValue(&'static str); - -impl fmt::Display for InvalidValue { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(fmt, "invalid value for option `{}`", self.0) - } -} - -impl error::Error for InvalidValue {} - -struct Parser<'a> { - s: &'a str, - it: iter::Peekable>, -} - -impl<'a> Parser<'a> { - fn parse(s: &'a str) -> Result { - let mut parser = Parser { - s, - it: s.char_indices().peekable(), - }; - - let mut config = Config::new(); - - while let Some((key, value)) = parser.parameter()? { - config.param(key, &value)?; - } - - Ok(config) - } - - fn skip_ws(&mut self) { - self.take_while(char::is_whitespace); - } - - fn take_while(&mut self, f: F) -> &'a str - where - F: Fn(char) -> bool, - { - let start = match self.it.peek() { - Some(&(i, _)) => i, - None => return "", - }; - - loop { - match self.it.peek() { - Some(&(_, c)) if f(c) => { - self.it.next(); - } - Some(&(i, _)) => return &self.s[start..i], - None => return &self.s[start..], - } - } - } - - fn eat(&mut self, target: char) -> Result<(), Error> { - match self.it.next() { - Some((_, c)) if c == target => Ok(()), - Some((i, c)) => { - let m = format!( - "unexpected character at byte {}: expected `{}` but got `{}`", - i, target, c - ); - Err(Error::config_parse(m.into())) - } - None => Err(Error::config_parse("unexpected EOF".into())), - } - } - - fn eat_if(&mut self, target: char) -> bool { - match self.it.peek() { - Some(&(_, c)) if c == target => { - self.it.next(); - true - } - _ => false, - } - } - - fn keyword(&mut self) -> Option<&'a str> { - let s = self.take_while(|c| match c { - c if c.is_whitespace() => false, - '=' => false, - _ => true, - }); - - if s.is_empty() { - None - } else { - Some(s) - } - } - - fn value(&mut self) -> Result { - let value = if self.eat_if('\'') { - let value = self.quoted_value()?; - self.eat('\'')?; - value - } else { - self.simple_value()? - }; - - Ok(value) - } - - fn simple_value(&mut self) -> Result { - let mut value = String::new(); - - while let Some(&(_, c)) = self.it.peek() { - if c.is_whitespace() { - break; - } - - self.it.next(); - if c == '\\' { - if let Some((_, c2)) = self.it.next() { - value.push(c2); - } - } else { - value.push(c); - } - } - - if value.is_empty() { - return Err(Error::config_parse("unexpected EOF".into())); - } - - Ok(value) - } - - fn quoted_value(&mut self) -> Result { - let mut value = String::new(); - - while let Some(&(_, c)) = self.it.peek() { - if c == '\'' { - return Ok(value); - } - - self.it.next(); - if c == '\\' { - if let Some((_, c2)) = self.it.next() { - value.push(c2); - } - } else { - value.push(c); - } - } - - Err(Error::config_parse( - "unterminated quoted connection parameter value".into(), - )) - } - - fn parameter(&mut self) -> Result, Error> { - self.skip_ws(); - let keyword = match self.keyword() { - Some(keyword) => keyword, - None => return Ok(None), - }; - self.skip_ws(); - self.eat('=')?; - self.skip_ws(); - let value = self.value()?; - - Ok(Some((keyword, value))) - } -} - -// This is a pretty sloppy "URL" parser, but it matches the behavior of libpq, where things really aren't very strict -struct UrlParser<'a> { - s: &'a str, - config: Config, -} - -impl<'a> UrlParser<'a> { - fn parse(s: &'a str) -> Result, Error> { - let s = match Self::remove_url_prefix(s) { - Some(s) => s, - None => return Ok(None), - }; - - let mut parser = UrlParser { - s, - config: Config::new(), - }; - - parser.parse_credentials()?; - parser.parse_host()?; - parser.parse_path()?; - parser.parse_params()?; - - Ok(Some(parser.config)) - } - - fn remove_url_prefix(s: &str) -> Option<&str> { - for prefix in &["postgres://", "postgresql://"] { - if let Some(stripped) = s.strip_prefix(prefix) { - return Some(stripped); - } - } - - None - } - - fn take_until(&mut self, end: &[char]) -> Option<&'a str> { - match self.s.find(end) { - Some(pos) => { - let (head, tail) = self.s.split_at(pos); - self.s = tail; - Some(head) - } - None => None, - } - } - - fn take_all(&mut self) -> &'a str { - mem::take(&mut self.s) - } - - fn eat_byte(&mut self) { - self.s = &self.s[1..]; - } - - fn parse_credentials(&mut self) -> Result<(), Error> { - let creds = match self.take_until(&['@']) { - Some(creds) => creds, - None => return Ok(()), - }; - self.eat_byte(); - - let mut it = creds.splitn(2, ':'); - let user = self.decode(it.next().unwrap())?; - self.config.user(&user); - - if let Some(password) = it.next() { - let password = Cow::from(percent_encoding::percent_decode(password.as_bytes())); - self.config.password(password); - } - - Ok(()) - } - - fn parse_host(&mut self) -> Result<(), Error> { - let host = match self.take_until(&['/', '?']) { - Some(host) => host, - None => self.take_all(), - }; - - if host.is_empty() { - return Ok(()); - } - - for chunk in host.split(',') { - let (host, port) = if chunk.starts_with('[') { - let idx = match chunk.find(']') { - Some(idx) => idx, - None => return Err(Error::config_parse(InvalidValue("host").into())), - }; - - let host = &chunk[1..idx]; - let remaining = &chunk[idx + 1..]; - let port = if let Some(port) = remaining.strip_prefix(':') { - Some(port) - } else if remaining.is_empty() { - None - } else { - return Err(Error::config_parse(InvalidValue("host").into())); - }; - - (host, port) - } else { - let mut it = chunk.splitn(2, ':'); - (it.next().unwrap(), it.next()) - }; - - self.host_param(host)?; - let port = self.decode(port.unwrap_or("5432"))?; - self.config.param("port", &port)?; - } - - Ok(()) - } - - fn parse_path(&mut self) -> Result<(), Error> { - if !self.s.starts_with('/') { - return Ok(()); - } - self.eat_byte(); - - let dbname = match self.take_until(&['?']) { - Some(dbname) => dbname, - None => self.take_all(), - }; - - if !dbname.is_empty() { - self.config.dbname(&self.decode(dbname)?); - } - - Ok(()) - } - - fn parse_params(&mut self) -> Result<(), Error> { - if !self.s.starts_with('?') { - return Ok(()); - } - self.eat_byte(); - - while !self.s.is_empty() { - let key = match self.take_until(&['=']) { - Some(key) => self.decode(key)?, - None => return Err(Error::config_parse("unterminated parameter".into())), - }; - self.eat_byte(); - - let value = match self.take_until(&['&']) { - Some(value) => { - self.eat_byte(); - value - } - None => self.take_all(), - }; - - if key == "host" { - self.host_param(value)?; - } else { - let value = self.decode(value)?; - self.config.param(&key, &value)?; - } - } - - Ok(()) - } - - fn host_param(&mut self, s: &str) -> Result<(), Error> { - let s = self.decode(s)?; - self.config.param("host", &s) - } - - fn decode(&self, s: &'a str) -> Result, Error> { - percent_encoding::percent_decode(s.as_bytes()) - .decode_utf8() - .map_err(|e| Error::config_parse(e.into())) - } -} diff --git a/libs/proxy/tokio-postgres2/src/connect.rs b/libs/proxy/tokio-postgres2/src/connect.rs index 7517fe0cdeb9..e0cb69748d50 100644 --- a/libs/proxy/tokio-postgres2/src/connect.rs +++ b/libs/proxy/tokio-postgres2/src/connect.rs @@ -1,13 +1,13 @@ use crate::client::SocketConfig; -use crate::config::{Host, TargetSessionAttrs}; +use crate::codec::BackendMessage; +use crate::config::Host; use crate::connect_raw::connect_raw; use crate::connect_socket::connect_socket; use crate::tls::{MakeTlsConnect, TlsConnect}; -use crate::{Client, Config, Connection, Error, SimpleQueryMessage}; -use futures_util::{future, pin_mut, Future, FutureExt, Stream}; -use std::io; -use std::task::Poll; +use crate::{Client, Config, Connection, Error, RawConnection}; +use postgres_protocol2::message::backend::Message; use tokio::net::TcpStream; +use tokio::sync::mpsc; pub async fn connect( mut tls: T, @@ -16,38 +16,18 @@ pub async fn connect( where T: MakeTlsConnect, { - if config.host.is_empty() { - return Err(Error::config("host missing".into())); - } - - if config.port.len() > 1 && config.port.len() != config.host.len() { - return Err(Error::config("invalid number of ports".into())); - } - - let mut error = None; - for (i, host) in config.host.iter().enumerate() { - let port = config - .port - .get(i) - .or_else(|| config.port.first()) - .copied() - .unwrap_or(5432); + let hostname = match &config.host { + Host::Tcp(host) => host.as_str(), + }; - let hostname = match host { - Host::Tcp(host) => host.as_str(), - }; + let tls = tls + .make_tls_connect(hostname) + .map_err(|e| Error::tls(e.into()))?; - let tls = tls - .make_tls_connect(hostname) - .map_err(|e| Error::tls(e.into()))?; - - match connect_once(host, port, tls, config).await { - Ok((client, connection)) => return Ok((client, connection)), - Err(e) => error = Some(e), - } + match connect_once(&config.host, config.port, tls, config).await { + Ok((client, connection)) => Ok((client, connection)), + Err(e) => Err(e), } - - Err(error.unwrap()) } async fn connect_once( @@ -60,53 +40,36 @@ where T: TlsConnect, { let socket = connect_socket(host, port, config.connect_timeout).await?; - let (mut client, mut connection) = connect_raw(socket, tls, config).await?; - - if let TargetSessionAttrs::ReadWrite = config.target_session_attrs { - let rows = client.simple_query_raw("SHOW transaction_read_only"); - pin_mut!(rows); - - let rows = future::poll_fn(|cx| { - if connection.poll_unpin(cx)?.is_ready() { - return Poll::Ready(Err(Error::closed())); - } - - rows.as_mut().poll(cx) - }) - .await?; - pin_mut!(rows); - - loop { - let next = future::poll_fn(|cx| { - if connection.poll_unpin(cx)?.is_ready() { - return Poll::Ready(Some(Err(Error::closed()))); - } - - rows.as_mut().poll_next(cx) - }); - - match next.await.transpose()? { - Some(SimpleQueryMessage::Row(row)) => { - if row.try_get(0)? == Some("on") { - return Err(Error::connect(io::Error::new( - io::ErrorKind::PermissionDenied, - "database does not allow writes", - ))); - } else { - break; - } - } - Some(_) => {} - None => return Err(Error::unexpected_message()), - } - } - } - - client.set_socket_config(SocketConfig { + let RawConnection { + stream, + parameters, + delayed_notice, + process_id, + secret_key, + } = connect_raw(socket, tls, config).await?; + + let socket_config = SocketConfig { host: host.clone(), port, connect_timeout: config.connect_timeout, - }); + }; + + let (sender, receiver) = mpsc::unbounded_channel(); + let client = Client::new( + sender, + socket_config, + config.ssl_mode, + process_id, + secret_key, + ); + + // delayed notices are always sent as "Async" messages. + let delayed = delayed_notice + .into_iter() + .map(|m| BackendMessage::Async(Message::NoticeResponse(m))) + .collect(); + + let connection = Connection::new(stream, delayed, parameters, receiver); Ok((client, connection)) } diff --git a/libs/proxy/tokio-postgres2/src/connect_raw.rs b/libs/proxy/tokio-postgres2/src/connect_raw.rs index 80677af969f6..66db85e07d24 100644 --- a/libs/proxy/tokio-postgres2/src/connect_raw.rs +++ b/libs/proxy/tokio-postgres2/src/connect_raw.rs @@ -1,29 +1,27 @@ use crate::codec::{BackendMessage, BackendMessages, FrontendMessage, PostgresCodec}; -use crate::config::{self, AuthKeys, Config, ReplicationMode}; +use crate::config::{self, AuthKeys, Config}; use crate::connect_tls::connect_tls; use crate::maybe_tls_stream::MaybeTlsStream; use crate::tls::{TlsConnect, TlsStream}; -use crate::{Client, Connection, Error}; +use crate::Error; use bytes::BytesMut; use fallible_iterator::FallibleIterator; use futures_util::{ready, Sink, SinkExt, Stream, TryStreamExt}; -use postgres_protocol2::authentication; use postgres_protocol2::authentication::sasl; use postgres_protocol2::authentication::sasl::ScramSha256; -use postgres_protocol2::message::backend::{AuthenticationSaslBody, Message}; +use postgres_protocol2::message::backend::{AuthenticationSaslBody, Message, NoticeResponseBody}; use postgres_protocol2::message::frontend; -use std::collections::{HashMap, VecDeque}; +use std::collections::HashMap; use std::io; use std::pin::Pin; use std::task::{Context, Poll}; use tokio::io::{AsyncRead, AsyncWrite}; -use tokio::sync::mpsc; use tokio_util::codec::Framed; pub struct StartupStream { inner: Framed, PostgresCodec>, buf: BackendMessages, - delayed: VecDeque, + delayed_notice: Vec, } impl Sink for StartupStream @@ -78,11 +76,19 @@ where } } +pub struct RawConnection { + pub stream: Framed, PostgresCodec>, + pub parameters: HashMap, + pub delayed_notice: Vec, + pub process_id: i32, + pub secret_key: i32, +} + pub async fn connect_raw( stream: S, tls: T, config: &Config, -) -> Result<(Client, Connection), Error> +) -> Result, Error> where S: AsyncRead + AsyncWrite + Unpin, T: TlsConnect, @@ -90,25 +96,22 @@ where let stream = connect_tls(stream, config.ssl_mode, tls).await?; let mut stream = StartupStream { - inner: Framed::new( - stream, - PostgresCodec { - max_message_size: config.max_backend_message_size, - }, - ), + inner: Framed::new(stream, PostgresCodec), buf: BackendMessages::empty(), - delayed: VecDeque::new(), + delayed_notice: Vec::new(), }; startup(&mut stream, config).await?; authenticate(&mut stream, config).await?; let (process_id, secret_key, parameters) = read_info(&mut stream).await?; - let (sender, receiver) = mpsc::unbounded_channel(); - let client = Client::new(sender, config.ssl_mode, process_id, secret_key); - let connection = Connection::new(stream.inner, stream.delayed, parameters, receiver); - - Ok((client, connection)) + Ok(RawConnection { + stream: stream.inner, + parameters, + delayed_notice: stream.delayed_notice, + process_id, + secret_key, + }) } async fn startup(stream: &mut StartupStream, config: &Config) -> Result<(), Error> @@ -116,28 +119,8 @@ where S: AsyncRead + AsyncWrite + Unpin, T: AsyncRead + AsyncWrite + Unpin, { - let mut params = vec![("client_encoding", "UTF8")]; - if let Some(user) = &config.user { - params.push(("user", &**user)); - } - if let Some(dbname) = &config.dbname { - params.push(("database", &**dbname)); - } - if let Some(options) = &config.options { - params.push(("options", &**options)); - } - if let Some(application_name) = &config.application_name { - params.push(("application_name", &**application_name)); - } - if let Some(replication_mode) = &config.replication_mode { - match replication_mode { - ReplicationMode::Physical => params.push(("replication", "true")), - ReplicationMode::Logical => params.push(("replication", "database")), - } - } - let mut buf = BytesMut::new(); - frontend::startup_message(params, &mut buf).map_err(Error::encode)?; + frontend::startup_message(&config.server_params, &mut buf).map_err(Error::encode)?; stream .send(FrontendMessage::Raw(buf.freeze())) @@ -165,25 +148,11 @@ where authenticate_password(stream, pass).await?; } - Some(Message::AuthenticationMd5Password(body)) => { - can_skip_channel_binding(config)?; - - let user = config - .user - .as_ref() - .ok_or_else(|| Error::config("user missing".into()))?; - let pass = config - .password - .as_ref() - .ok_or_else(|| Error::config("password missing".into()))?; - - let output = authentication::md5_hash(user.as_bytes(), pass, body.salt()); - authenticate_password(stream, output.as_bytes()).await?; - } Some(Message::AuthenticationSasl(body)) => { authenticate_sasl(stream, body, config).await?; } - Some(Message::AuthenticationKerberosV5) + Some(Message::AuthenticationMd5Password) + | Some(Message::AuthenticationKerberosV5) | Some(Message::AuthenticationScmCredential) | Some(Message::AuthenticationGss) | Some(Message::AuthenticationSspi) => { @@ -347,9 +316,7 @@ where body.value().map_err(Error::parse)?.to_string(), ); } - Some(msg @ Message::NoticeResponse(_)) => { - stream.delayed.push_back(BackendMessage::Async(msg)) - } + Some(Message::NoticeResponse(body)) => stream.delayed_notice.push(body), Some(Message::ReadyForQuery(_)) => return Ok((process_id, secret_key, parameters)), Some(Message::ErrorResponse(body)) => return Err(Error::db(body)), Some(_) => return Err(Error::unexpected_message()), diff --git a/libs/proxy/tokio-postgres2/src/error/mod.rs b/libs/proxy/tokio-postgres2/src/error/mod.rs index 651432225009..922c348525c6 100644 --- a/libs/proxy/tokio-postgres2/src/error/mod.rs +++ b/libs/proxy/tokio-postgres2/src/error/mod.rs @@ -349,7 +349,6 @@ enum Kind { Parse, Encode, Authentication, - ConfigParse, Config, Connect, Timeout, @@ -386,7 +385,6 @@ impl fmt::Display for Error { Kind::Parse => fmt.write_str("error parsing response from server")?, Kind::Encode => fmt.write_str("error encoding message to server")?, Kind::Authentication => fmt.write_str("authentication error")?, - Kind::ConfigParse => fmt.write_str("invalid connection string")?, Kind::Config => fmt.write_str("invalid configuration")?, Kind::Connect => fmt.write_str("error connecting to server")?, Kind::Timeout => fmt.write_str("timeout waiting for server")?, @@ -482,10 +480,6 @@ impl Error { Error::new(Kind::Authentication, Some(e)) } - pub(crate) fn config_parse(e: Box) -> Error { - Error::new(Kind::ConfigParse, Some(e)) - } - pub(crate) fn config(e: Box) -> Error { Error::new(Kind::Config, Some(e)) } diff --git a/libs/proxy/tokio-postgres2/src/lib.rs b/libs/proxy/tokio-postgres2/src/lib.rs index 72ba8172b28e..901ed0c96c68 100644 --- a/libs/proxy/tokio-postgres2/src/lib.rs +++ b/libs/proxy/tokio-postgres2/src/lib.rs @@ -1,9 +1,10 @@ //! An asynchronous, pipelined, PostgreSQL client. -#![warn(rust_2018_idioms, clippy::all, missing_docs)] +#![warn(rust_2018_idioms, clippy::all)] pub use crate::cancel_token::CancelToken; -pub use crate::client::Client; +pub use crate::client::{Client, SocketConfig}; pub use crate::config::Config; +pub use crate::connect_raw::RawConnection; pub use crate::connection::Connection; use crate::error::DbError; pub use crate::error::Error; @@ -12,14 +13,12 @@ pub use crate::query::RowStream; pub use crate::row::{Row, SimpleQueryRow}; pub use crate::simple_query::SimpleQueryStream; pub use crate::statement::{Column, Statement}; -use crate::tls::MakeTlsConnect; pub use crate::tls::NoTls; pub use crate::to_statement::ToStatement; pub use crate::transaction::Transaction; pub use crate::transaction_builder::{IsolationLevel, TransactionBuilder}; use crate::types::ToSql; use postgres_protocol2::message::backend::ReadyForQueryBody; -use tokio::net::TcpStream; /// After executing a query, the connection will be in one of these states #[derive(Clone, Copy, Debug, PartialEq)] @@ -71,24 +70,6 @@ mod transaction; mod transaction_builder; pub mod types; -/// A convenience function which parses a connection string and connects to the database. -/// -/// See the documentation for [`Config`] for details on the connection string format. -/// -/// Requires the `runtime` Cargo feature (enabled by default). -/// -/// [`Config`]: config/struct.Config.html -pub async fn connect( - config: &str, - tls: T, -) -> Result<(Client, Connection), Error> -where - T: MakeTlsConnect, -{ - let config = config.parse::()?; - config.connect(tls).await -} - /// An asynchronous notification. #[derive(Clone, Debug)] pub struct Notification { diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 5648072a83c2..66500fb141bc 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -26,6 +26,7 @@ humantime.workspace = true hyper0 = { workspace = true, features = ["full"] } fail.workspace = true futures = { workspace = true} +jemalloc_pprof.workspace = true jsonwebtoken.workspace = true nix.workspace = true once_cell.workspace = true diff --git a/libs/utils/src/http/endpoint.rs b/libs/utils/src/http/endpoint.rs index 6a85f0ddeb26..d975b63677ac 100644 --- a/libs/utils/src/http/endpoint.rs +++ b/libs/utils/src/http/endpoint.rs @@ -10,6 +10,7 @@ use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder}; use once_cell::sync::Lazy; use routerify::ext::RequestExt; use routerify::{Middleware, RequestInfo, Router, RouterBuilder}; +use tokio_util::io::ReaderStream; use tracing::{debug, info, info_span, warn, Instrument}; use std::future::Future; @@ -407,6 +408,69 @@ pub async fn profile_cpu_handler(req: Request) -> Result, A } } +/// Generates heap profiles. +/// +/// This only works with jemalloc on Linux. +pub async fn profile_heap_handler(req: Request) -> Result, ApiError> { + enum Format { + Jemalloc, + Pprof, + } + + // Parameters. + let format = match get_query_param(&req, "format")?.as_deref() { + None => Format::Pprof, + Some("jemalloc") => Format::Jemalloc, + Some("pprof") => Format::Pprof, + Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))), + }; + + // Obtain profiler handle. + let mut prof_ctl = jemalloc_pprof::PROF_CTL + .as_ref() + .ok_or(ApiError::InternalServerError(anyhow!( + "heap profiling not enabled" + )))? + .lock() + .await; + if !prof_ctl.activated() { + return Err(ApiError::InternalServerError(anyhow!( + "heap profiling not enabled" + ))); + } + + // Take and return the profile. + match format { + Format::Jemalloc => { + // NB: file is an open handle to a tempfile that's already deleted. + let file = tokio::task::spawn_blocking(move || prof_ctl.dump()) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; + let stream = ReaderStream::new(tokio::fs::File::from_std(file)); + Response::builder() + .status(200) + .header(CONTENT_TYPE, "application/octet-stream") + .header(CONTENT_DISPOSITION, "attachment; filename=\"heap.dump\"") + .body(Body::wrap_stream(stream)) + .map_err(|err| ApiError::InternalServerError(err.into())) + } + + Format::Pprof => { + let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof()) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; + Response::builder() + .status(200) + .header(CONTENT_TYPE, "application/octet-stream") + .header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb\"") + .body(Body::from(data)) + .map_err(|err| ApiError::InternalServerError(err.into())) + } + } +} + pub fn add_request_id_middleware( ) -> Middleware { Middleware::pre(move |req| async move { diff --git a/libs/utils/src/shard.rs b/libs/utils/src/shard.rs index 782cddc599b0..6352ea9f9253 100644 --- a/libs/utils/src/shard.rs +++ b/libs/utils/src/shard.rs @@ -164,6 +164,12 @@ impl TenantShardId { } } +impl std::fmt::Display for ShardNumber { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + impl std::fmt::Display for ShardSlug<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( diff --git a/libs/utils/src/sync.rs b/libs/utils/src/sync.rs index 7aa26e24bcc4..280637de8feb 100644 --- a/libs/utils/src/sync.rs +++ b/libs/utils/src/sync.rs @@ -1,5 +1,6 @@ pub mod heavier_once_cell; +pub mod duplex; pub mod gate; pub mod spsc_fold; diff --git a/libs/utils/src/sync/duplex.rs b/libs/utils/src/sync/duplex.rs new file mode 100644 index 000000000000..fac79297a086 --- /dev/null +++ b/libs/utils/src/sync/duplex.rs @@ -0,0 +1 @@ +pub mod mpsc; diff --git a/libs/utils/src/sync/duplex/mpsc.rs b/libs/utils/src/sync/duplex/mpsc.rs new file mode 100644 index 000000000000..56b4e6d2b331 --- /dev/null +++ b/libs/utils/src/sync/duplex/mpsc.rs @@ -0,0 +1,36 @@ +use tokio::sync::mpsc; + +/// A bi-directional channel. +pub struct Duplex { + pub tx: mpsc::Sender, + pub rx: mpsc::Receiver, +} + +/// Creates a bi-directional channel. +/// +/// The channel will buffer up to the provided number of messages. Once the buffer is full, +/// attempts to send new messages will wait until a message is received from the channel. +/// The provided buffer capacity must be at least 1. +pub fn channel(buffer: usize) -> (Duplex, Duplex) { + let (tx_a, rx_a) = mpsc::channel::(buffer); + let (tx_b, rx_b) = mpsc::channel::(buffer); + + (Duplex { tx: tx_a, rx: rx_b }, Duplex { tx: tx_b, rx: rx_a }) +} + +impl Duplex { + /// Sends a value, waiting until there is capacity. + /// + /// A successful send occurs when it is determined that the other end of the channel has not hung up already. + pub async fn send(&self, x: S) -> Result<(), mpsc::error::SendError> { + self.tx.send(x).await + } + + /// Receives the next value for this receiver. + /// + /// This method returns `None` if the channel has been closed and there are + /// no remaining messages in the channel's buffer. + pub async fn recv(&mut self) -> Option { + self.rx.recv().await + } +} diff --git a/libs/wal_decoder/src/decoder.rs b/libs/wal_decoder/src/decoder.rs index 36c4b19266aa..aa50c629113b 100644 --- a/libs/wal_decoder/src/decoder.rs +++ b/libs/wal_decoder/src/decoder.rs @@ -112,30 +112,38 @@ impl MetadataRecord { }; // Next, filter the metadata record by shard. - - // Route VM page updates to the shards that own them. VM pages are stored in the VM fork - // of the main relation. These are sharded and managed just like regular relation pages. - // See: https://github.com/neondatabase/neon/issues/9855 - if let Some( - MetadataRecord::Heapam(HeapamRecord::ClearVmBits(ref mut clear_vm_bits)) - | MetadataRecord::Neonrmgr(NeonrmgrRecord::ClearVmBits(ref mut clear_vm_bits)), - ) = metadata_record - { - let is_local_vm_page = |heap_blk| { - let vm_blk = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blk); - shard.is_key_local(&rel_block_to_key(clear_vm_bits.vm_rel, vm_blk)) - }; - // Send the old and new VM page updates to their respective shards. - clear_vm_bits.old_heap_blkno = clear_vm_bits - .old_heap_blkno - .filter(|&blkno| is_local_vm_page(blkno)); - clear_vm_bits.new_heap_blkno = clear_vm_bits - .new_heap_blkno - .filter(|&blkno| is_local_vm_page(blkno)); - // If neither VM page belongs to this shard, discard the record. - if clear_vm_bits.old_heap_blkno.is_none() && clear_vm_bits.new_heap_blkno.is_none() { - metadata_record = None + match metadata_record { + Some( + MetadataRecord::Heapam(HeapamRecord::ClearVmBits(ref mut clear_vm_bits)) + | MetadataRecord::Neonrmgr(NeonrmgrRecord::ClearVmBits(ref mut clear_vm_bits)), + ) => { + // Route VM page updates to the shards that own them. VM pages are stored in the VM fork + // of the main relation. These are sharded and managed just like regular relation pages. + // See: https://github.com/neondatabase/neon/issues/9855 + let is_local_vm_page = |heap_blk| { + let vm_blk = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blk); + shard.is_key_local(&rel_block_to_key(clear_vm_bits.vm_rel, vm_blk)) + }; + // Send the old and new VM page updates to their respective shards. + clear_vm_bits.old_heap_blkno = clear_vm_bits + .old_heap_blkno + .filter(|&blkno| is_local_vm_page(blkno)); + clear_vm_bits.new_heap_blkno = clear_vm_bits + .new_heap_blkno + .filter(|&blkno| is_local_vm_page(blkno)); + // If neither VM page belongs to this shard, discard the record. + if clear_vm_bits.old_heap_blkno.is_none() && clear_vm_bits.new_heap_blkno.is_none() + { + metadata_record = None + } } + Some(MetadataRecord::LogicalMessage(LogicalMessageRecord::Put(_))) => { + // Filter LogicalMessage records (AUX files) to only be stored on shard zero + if !shard.is_shard_zero() { + metadata_record = None; + } + } + _ => {} } Ok(metadata_record) diff --git a/pageserver/benches/bench_ingest.rs b/pageserver/benches/bench_ingest.rs index caacd365b306..b67a9cc47951 100644 --- a/pageserver/benches/bench_ingest.rs +++ b/pageserver/benches/bench_ingest.rs @@ -62,10 +62,8 @@ async fn ingest( let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error); let gate = utils::sync::gate::Gate::default(); - let entered = gate.enter().unwrap(); - let layer = - InMemoryLayer::create(conf, timeline_id, tenant_shard_id, lsn, entered, &ctx).await?; + let layer = InMemoryLayer::create(conf, timeline_id, tenant_shard_id, lsn, &gate, &ctx).await?; let data = Value::Image(Bytes::from(vec![0u8; put_size])); let data_ser_size = data.serialized_size().unwrap() as usize; diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs index 4d76c66905c4..c3a1ef8140cb 100644 --- a/pageserver/client/src/mgmt_api.rs +++ b/pageserver/client/src/mgmt_api.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{collections::HashMap, error::Error as _}; use bytes::Bytes; use detach_ancestor::AncestorDetached; @@ -25,10 +25,10 @@ pub struct Client { #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("send request: {0}")] + #[error("send request: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())] SendRequest(reqwest::Error), - #[error("receive body: {0}")] + #[error("receive body: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())] ReceiveBody(reqwest::Error), #[error("receive error body: {0}")] diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index a8c2c2e99278..567a69da3b12 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -53,6 +53,11 @@ project_build_tag!(BUILD_TAG); #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). +#[allow(non_upper_case_globals)] +#[export_name = "malloc_conf"] +pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; + const PID_FILE_NAME: &str = "pageserver.pid"; const FEATURES: &[&str] = &[ @@ -127,6 +132,7 @@ fn main() -> anyhow::Result<()> { info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine"); info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode"); info!(?conf.wal_receiver_protocol, "starting with WAL receiver protocol"); + info!(?conf.page_service_pipelining, "starting with page service pipelining config"); // The tenants directory contains all the pageserver local disk state. // Create if not exists and make sure all the contents are durable before proceeding. @@ -302,7 +308,7 @@ fn start_pageserver( pageserver::metrics::tokio_epoll_uring::Collector::new(), )) .unwrap(); - pageserver::preinitialize_metrics(); + pageserver::preinitialize_metrics(conf); // If any failpoints were set from FAILPOINTS environment variable, // print them to the log for debugging purposes @@ -630,45 +636,59 @@ fn start_pageserver( tokio::net::TcpListener::from_std(pageserver_listener).context("create tokio listener")? }); - let mut shutdown_pageserver = Some(shutdown_pageserver.drop_guard()); - // All started up! Now just sit and wait for shutdown signal. - - { - BACKGROUND_RUNTIME.block_on(async move { + BACKGROUND_RUNTIME.block_on(async move { + let signal_token = CancellationToken::new(); + let signal_cancel = signal_token.child_token(); + + // Spawn signal handlers. Runs in a loop since we want to be responsive to multiple signals + // even after triggering shutdown (e.g. a SIGQUIT after a slow SIGTERM shutdown). See: + // https://github.com/neondatabase/neon/issues/9740. + tokio::spawn(async move { let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt()).unwrap(); let mut sigterm = tokio::signal::unix::signal(SignalKind::terminate()).unwrap(); let mut sigquit = tokio::signal::unix::signal(SignalKind::quit()).unwrap(); - let signal = tokio::select! { - _ = sigquit.recv() => { - info!("Got signal SIGQUIT. Terminating in immediate shutdown mode",); - std::process::exit(111); + + loop { + let signal = tokio::select! { + _ = sigquit.recv() => { + info!("Got signal SIGQUIT. Terminating in immediate shutdown mode."); + std::process::exit(111); + } + _ = sigint.recv() => "SIGINT", + _ = sigterm.recv() => "SIGTERM", + }; + + if !signal_token.is_cancelled() { + info!("Got signal {signal}. Terminating gracefully in fast shutdown mode."); + signal_token.cancel(); + } else { + info!("Got signal {signal}. Already shutting down."); } - _ = sigint.recv() => { "SIGINT" }, - _ = sigterm.recv() => { "SIGTERM" }, - }; - - info!("Got signal {signal}. Terminating gracefully in fast shutdown mode",); - - // This cancels the `shutdown_pageserver` cancellation tree. - // Right now that tree doesn't reach very far, and `task_mgr` is used instead. - // The plan is to change that over time. - shutdown_pageserver.take(); - pageserver::shutdown_pageserver( - http_endpoint_listener, - page_service, - consumption_metrics_tasks, - disk_usage_eviction_task, - &tenant_manager, - background_purges, - deletion_queue.clone(), - secondary_controller_tasks, - 0, - ) - .await; - unreachable!() - }) - } + } + }); + + // Wait for cancellation signal and shut down the pageserver. + // + // This cancels the `shutdown_pageserver` cancellation tree. Right now that tree doesn't + // reach very far, and `task_mgr` is used instead. The plan is to change that over time. + signal_cancel.cancelled().await; + + shutdown_pageserver.cancel(); + pageserver::shutdown_pageserver( + http_endpoint_listener, + page_service, + consumption_metrics_tasks, + disk_usage_eviction_task, + &tenant_manager, + background_purges, + deletion_queue.clone(), + secondary_controller_tasks, + 0, + ) + .await; + unreachable!(); + }) } async fn create_remote_storage_client( diff --git a/pageserver/src/consumption_metrics/upload.rs b/pageserver/src/consumption_metrics/upload.rs index 1cb4e917c081..448bf4752581 100644 --- a/pageserver/src/consumption_metrics/upload.rs +++ b/pageserver/src/consumption_metrics/upload.rs @@ -1,3 +1,4 @@ +use std::error::Error as _; use std::time::SystemTime; use chrono::{DateTime, Utc}; @@ -350,7 +351,11 @@ impl std::fmt::Display for UploadError { match self { Rejected(code) => write!(f, "server rejected the metrics with {code}"), - Reqwest(e) => write!(f, "request failed: {e}"), + Reqwest(e) => write!( + f, + "request failed: {e}{}", + e.source().map(|e| format!(": {e}")).unwrap_or_default() + ), Cancelled => write!(f, "cancelled"), } } diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index 7afcf52cf29e..8f2177fe5b22 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -91,8 +91,6 @@ use crate::task_mgr::TaskKind; -pub(crate) mod optional_counter; - // The main structure of this module, see module-level comment. #[derive(Debug)] pub struct RequestContext { @@ -100,7 +98,6 @@ pub struct RequestContext { download_behavior: DownloadBehavior, access_stats_behavior: AccessStatsBehavior, page_content_kind: PageContentKind, - pub micros_spent_throttled: optional_counter::MicroSecondsCounterU32, } /// The kind of access to the page cache. @@ -158,7 +155,6 @@ impl RequestContextBuilder { download_behavior: DownloadBehavior::Download, access_stats_behavior: AccessStatsBehavior::Update, page_content_kind: PageContentKind::Unknown, - micros_spent_throttled: Default::default(), }, } } @@ -172,7 +168,6 @@ impl RequestContextBuilder { download_behavior: original.download_behavior, access_stats_behavior: original.access_stats_behavior, page_content_kind: original.page_content_kind, - micros_spent_throttled: Default::default(), }, } } diff --git a/pageserver/src/context/optional_counter.rs b/pageserver/src/context/optional_counter.rs deleted file mode 100644 index 100c649f18cb..000000000000 --- a/pageserver/src/context/optional_counter.rs +++ /dev/null @@ -1,101 +0,0 @@ -use std::{ - sync::atomic::{AtomicU32, Ordering}, - time::Duration, -}; - -#[derive(Debug)] -pub struct CounterU32 { - inner: AtomicU32, -} -impl Default for CounterU32 { - fn default() -> Self { - Self { - inner: AtomicU32::new(u32::MAX), - } - } -} -impl CounterU32 { - pub fn open(&self) -> Result<(), &'static str> { - match self - .inner - .compare_exchange(u32::MAX, 0, Ordering::Relaxed, Ordering::Relaxed) - { - Ok(_) => Ok(()), - Err(_) => Err("open() called on clsoed state"), - } - } - pub fn close(&self) -> Result { - match self.inner.swap(u32::MAX, Ordering::Relaxed) { - u32::MAX => Err("close() called on closed state"), - x => Ok(x), - } - } - - pub fn add(&self, count: u32) -> Result<(), &'static str> { - if count == 0 { - return Ok(()); - } - let mut had_err = None; - self.inner - .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |cur| match cur { - u32::MAX => { - had_err = Some("add() called on closed state"); - None - } - x => { - let (new, overflowed) = x.overflowing_add(count); - if new == u32::MAX || overflowed { - had_err = Some("add() overflowed the counter"); - None - } else { - Some(new) - } - } - }) - .map_err(|_| had_err.expect("we set it whenever the function returns None")) - .map(|_| ()) - } -} - -#[derive(Default, Debug)] -pub struct MicroSecondsCounterU32 { - inner: CounterU32, -} - -impl MicroSecondsCounterU32 { - pub fn open(&self) -> Result<(), &'static str> { - self.inner.open() - } - pub fn add(&self, duration: Duration) -> Result<(), &'static str> { - match duration.as_micros().try_into() { - Ok(x) => self.inner.add(x), - Err(_) => Err("add(): duration conversion error"), - } - } - pub fn close_and_checked_sub_from(&self, from: Duration) -> Result { - let val = self.inner.close()?; - let val = Duration::from_micros(val as u64); - let subbed = match from.checked_sub(val) { - Some(v) => v, - None => return Err("Duration::checked_sub"), - }; - Ok(subbed) - } -} - -#[cfg(test)] -mod tests { - - use super::*; - - #[test] - fn test_basic() { - let counter = MicroSecondsCounterU32::default(); - counter.open().unwrap(); - counter.add(Duration::from_micros(23)).unwrap(); - let res = counter - .close_and_checked_sub_from(Duration::from_micros(42)) - .unwrap(); - assert_eq!(res, Duration::from_micros(42 - 23)); - } -} diff --git a/pageserver/src/controller_upcall_client.rs b/pageserver/src/controller_upcall_client.rs index 73fc6dc3ab1f..d41bfd9021c5 100644 --- a/pageserver/src/controller_upcall_client.rs +++ b/pageserver/src/controller_upcall_client.rs @@ -115,6 +115,10 @@ impl ControllerUpcallClient { Ok(res) } + + pub(crate) fn base_url(&self) -> &Url { + &self.base_url + } } impl ControlPlaneGenerationsApi for ControllerUpcallClient { @@ -191,13 +195,15 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient { let request = ReAttachRequest { node_id: self.node_id, - register, + register: register.clone(), }; let response: ReAttachResponse = self.retry_http_forever(&re_attach_path, request).await?; tracing::info!( - "Received re-attach response with {} tenants", - response.tenants.len() + "Received re-attach response with {} tenants (node {}, register: {:?})", + response.tenants.len(), + self.node_id, + register, ); failpoint_support::sleep_millis_async!("control-plane-client-re-attach"); diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index ceb1c3b012f5..0f11bbc50790 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -56,9 +56,9 @@ use tokio_util::sync::CancellationToken; use tracing::*; use utils::auth::JwtAuth; use utils::failpoint_support::failpoints_handler; -use utils::http::endpoint::profile_cpu_handler; -use utils::http::endpoint::prometheus_metrics_handler; -use utils::http::endpoint::request_span; +use utils::http::endpoint::{ + profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span, +}; use utils::http::request::must_parse_query_param; use utils::http::request::{get_request_param, must_get_query_param, parse_query_param}; @@ -87,7 +87,7 @@ use crate::tenant::timeline::offload::offload_timeline; use crate::tenant::timeline::offload::OffloadError; use crate::tenant::timeline::CompactFlags; use crate::tenant::timeline::CompactOptions; -use crate::tenant::timeline::CompactRange; +use crate::tenant::timeline::CompactRequest; use crate::tenant::timeline::CompactionError; use crate::tenant::timeline::Timeline; use crate::tenant::GetTimelineError; @@ -155,6 +155,7 @@ impl State { "/swagger.yml", "/metrics", "/profile/cpu", + "/profile/heap", ]; Ok(Self { conf, @@ -278,7 +279,10 @@ impl From for ApiError { impl From for ApiError { fn from(tse: GetTenantError) -> ApiError { match tse { - GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()), + GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {tid}").into()), + GetTenantError::ShardNotFound(tid) => { + ApiError::NotFound(anyhow!("tenant {tid}").into()) + } GetTenantError::NotActive(_) => { // Why is this not `ApiError::NotFound`? // Because we must be careful to never return 404 for a tenant if it does @@ -386,6 +390,16 @@ impl From for ApiError { } } +impl From for ApiError { + fn from(ste: crate::tenant::secondary::SecondaryTenantError) -> ApiError { + use crate::tenant::secondary::SecondaryTenantError; + match ste { + SecondaryTenantError::GetTenant(gte) => gte.into(), + SecondaryTenantError::ShuttingDown => ApiError::ShuttingDown, + } + } +} + // Helper function to construct a TimelineInfo struct for a timeline async fn build_timeline_info( timeline: &Arc, @@ -1046,9 +1060,11 @@ async fn timeline_delete_handler( match e { // GetTenantError has a built-in conversion to ApiError, but in this context we don't // want to treat missing tenants as 404, to avoid ambiguity with successful deletions. - GetTenantError::NotFound(_) => ApiError::PreconditionFailed( - "Requested tenant is missing".to_string().into_boxed_str(), - ), + GetTenantError::NotFound(_) | GetTenantError::ShardNotFound(_) => { + ApiError::PreconditionFailed( + "Requested tenant is missing".to_string().into_boxed_str(), + ) + } e => e.into(), } })?; @@ -1962,6 +1978,26 @@ async fn timeline_gc_handler( json_response(StatusCode::OK, gc_result) } +// Cancel scheduled compaction tasks +async fn timeline_cancel_compact_handler( + request: Request, + _cancel: CancellationToken, +) -> Result, ApiError> { + let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; + let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; + check_permission(&request, Some(tenant_shard_id.tenant_id))?; + let state = get_state(&request); + async { + let tenant = state + .tenant_manager + .get_attached_tenant_shard(tenant_shard_id)?; + tenant.cancel_scheduled_compaction(timeline_id); + json_response(StatusCode::OK, ()) + } + .instrument(info_span!("timeline_cancel_compact", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id)) + .await +} + // Run compaction immediately on given timeline. async fn timeline_compact_handler( mut request: Request, @@ -1971,7 +2007,7 @@ async fn timeline_compact_handler( let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; - let compact_range = json_request_maybe::>(&mut request).await?; + let compact_request = json_request_maybe::>(&mut request).await?; let state = get_state(&request); @@ -1996,22 +2032,50 @@ async fn timeline_compact_handler( let wait_until_uploaded = parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false); + let wait_until_scheduled_compaction_done = + parse_query_param::<_, bool>(&request, "wait_until_scheduled_compaction_done")? + .unwrap_or(false); + + let sub_compaction = compact_request + .as_ref() + .map(|r| r.sub_compaction) + .unwrap_or(false); let options = CompactOptions { - compact_range, + compact_range: compact_request + .as_ref() + .and_then(|r| r.compact_range.clone()), + compact_below_lsn: compact_request.as_ref().and_then(|r| r.compact_below_lsn), flags, + sub_compaction, }; + let scheduled = compact_request + .as_ref() + .map(|r| r.scheduled) + .unwrap_or(false); + async { let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?; - timeline - .compact_with_options(&cancel, options, &ctx) - .await - .map_err(|e| ApiError::InternalServerError(e.into()))?; - if wait_until_uploaded { - timeline.remote_client.wait_completion().await - // XXX map to correct ApiError for the cases where it's due to shutdown - .context("wait completion").map_err(ApiError::InternalServerError)?; + if scheduled { + let tenant = state + .tenant_manager + .get_attached_tenant_shard(tenant_shard_id)?; + let rx = tenant.schedule_compaction(timeline_id, options).await; + if wait_until_scheduled_compaction_done { + // It is possible that this will take a long time, dropping the HTTP request will not cancel the compaction. + rx.await.ok(); + } + } else { + timeline + .compact_with_options(&cancel, options, &ctx) + .await + .map_err(|e| ApiError::InternalServerError(e.into()))?; + if wait_until_uploaded { + timeline.remote_client.wait_completion().await + // XXX map to correct ApiError for the cases where it's due to shutdown + .context("wait completion").map_err(ApiError::InternalServerError)?; + } } json_response(StatusCode::OK, ()) } @@ -2092,16 +2156,20 @@ async fn timeline_checkpoint_handler( // By default, checkpoints come with a compaction, but this may be optionally disabled by tests that just want to flush + upload. let compact = parse_query_param::<_, bool>(&request, "compact")?.unwrap_or(true); + let wait_until_flushed: bool = + parse_query_param(&request, "wait_until_flushed")?.unwrap_or(true); + let wait_until_uploaded = parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false); async { let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?; - timeline - .freeze_and_flush() - .await - .map_err(|e| { + if wait_until_flushed { + timeline.freeze_and_flush().await + } else { + timeline.freeze().await.and(Ok(())) + }.map_err(|e| { match e { tenant::timeline::FlushLayerError::Cancelled => ApiError::ShuttingDown, other => ApiError::InternalServerError(other.into()), @@ -2461,8 +2529,7 @@ async fn secondary_upload_handler( state .secondary_controller .upload_tenant(tenant_shard_id) - .await - .map_err(ApiError::InternalServerError)?; + .await?; json_response(StatusCode::OK, ()) } @@ -2577,7 +2644,7 @@ async fn secondary_download_handler( // Edge case: downloads aren't usually fallible: things like a missing heatmap are considered // okay. We could get an error here in the unlikely edge case that the tenant // was detached between our check above and executing the download job. - Ok(Err(e)) => return Err(ApiError::InternalServerError(e)), + Ok(Err(e)) => return Err(e.into()), // A timeout is not an error: we have started the download, we're just not done // yet. The caller will get a response body indicating status. Err(_) => StatusCode::ACCEPTED, @@ -3203,6 +3270,7 @@ pub fn make_router( .data(state) .get("/metrics", |r| request_span(r, prometheus_metrics_handler)) .get("/profile/cpu", |r| request_span(r, profile_cpu_handler)) + .get("/profile/heap", |r| request_span(r, profile_heap_handler)) .get("/v1/status", |r| api_handler(r, status_handler)) .put("/v1/failpoints", |r| { testing_api_handler("manage failpoints", r, failpoints_handler) @@ -3285,6 +3353,10 @@ pub fn make_router( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact", |r| api_handler(r, timeline_compact_handler), ) + .delete( + "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact", + |r| api_handler(r, timeline_cancel_compact_handler), + ) .put( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/offload", |r| testing_api_handler("attempt timeline offload", r, timeline_offload_handler), diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs index 06c4553e1c5b..c061714010a2 100644 --- a/pageserver/src/import_datadir.rs +++ b/pageserver/src/import_datadir.rs @@ -575,18 +575,24 @@ async fn import_file( } else if file_path.starts_with("pg_xact") { let slru = SlruKind::Clog; - import_slru(modification, slru, file_path, reader, len, ctx).await?; - debug!("imported clog slru"); + if modification.tline.tenant_shard_id.is_shard_zero() { + import_slru(modification, slru, file_path, reader, len, ctx).await?; + debug!("imported clog slru"); + } } else if file_path.starts_with("pg_multixact/offsets") { let slru = SlruKind::MultiXactOffsets; - import_slru(modification, slru, file_path, reader, len, ctx).await?; - debug!("imported multixact offsets slru"); + if modification.tline.tenant_shard_id.is_shard_zero() { + import_slru(modification, slru, file_path, reader, len, ctx).await?; + debug!("imported multixact offsets slru"); + } } else if file_path.starts_with("pg_multixact/members") { let slru = SlruKind::MultiXactMembers; - import_slru(modification, slru, file_path, reader, len, ctx).await?; - debug!("imported multixact members slru"); + if modification.tline.tenant_shard_id.is_shard_zero() { + import_slru(modification, slru, file_path, reader, len, ctx).await?; + debug!("imported multixact members slru"); + } } else if file_path.starts_with("pg_twophase") { let bytes = read_all_bytes(reader).await?; diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 86be97587fef..96ee1578563b 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -7,6 +7,10 @@ use metrics::{ IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec, }; use once_cell::sync::Lazy; +use pageserver_api::config::{ + PageServicePipeliningConfig, PageServicePipeliningConfigPipelined, + PageServiceProtocolPipelinedExecutionStrategy, +}; use pageserver_api::shard::TenantShardId; use postgres_backend::{is_expected_io_error, QueryError}; use pq_proto::framed::ConnectionError; @@ -213,31 +217,16 @@ impl<'a> ScanLatencyOngoingRecording<'a> { ScanLatencyOngoingRecording { parent, start } } - pub(crate) fn observe(self, throttled: Option) { + pub(crate) fn observe(self) { let elapsed = self.start.elapsed(); - let ex_throttled = if let Some(throttled) = throttled { - elapsed.checked_sub(throttled) - } else { - Some(elapsed) - }; - if let Some(ex_throttled) = ex_throttled { - self.parent.observe(ex_throttled.as_secs_f64()); - } else { - use utils::rate_limit::RateLimit; - static LOGGED: Lazy> = - Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10)))); - let mut rate_limit = LOGGED.lock().unwrap(); - rate_limit.call(|| { - warn!("error deducting time spent throttled; this message is logged at a global rate limit"); - }); - } + self.parent.observe(elapsed.as_secs_f64()); } } pub(crate) static GET_VECTORED_LATENCY: Lazy = Lazy::new(|| { let inner = register_histogram_vec!( "pageserver_get_vectored_seconds", - "Time spent in get_vectored, excluding time spent in timeline_get_throttle.", + "Time spent in get_vectored.", &["task_kind"], CRITICAL_OP_BUCKETS.into(), ) @@ -260,7 +249,7 @@ pub(crate) static GET_VECTORED_LATENCY: Lazy = Lazy::new(|| pub(crate) static SCAN_LATENCY: Lazy = Lazy::new(|| { let inner = register_histogram_vec!( "pageserver_scan_seconds", - "Time spent in scan, excluding time spent in timeline_get_throttle.", + "Time spent in scan.", &["task_kind"], CRITICAL_OP_BUCKETS.into(), ) @@ -475,6 +464,24 @@ static LAST_RECORD_LSN: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static DISK_CONSISTENT_LSN: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_disk_consistent_lsn", + "Disk consistent LSN grouped by timeline", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + +pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy = Lazy::new(|| { + register_uint_gauge_vec!( + "pageserver_projected_remote_consistent_lsn", + "Projected remote consistent LSN grouped by timeline", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + static PITR_HISTORY_SIZE: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_pitr_history_size", @@ -1216,28 +1223,62 @@ pub(crate) mod virtual_file_io_engine { }); } -struct GlobalAndPerTimelineHistogramTimer<'a, 'c> { - global_latency_histo: &'a Histogram, +pub(crate) struct SmgrOpTimer(Option); +pub(crate) struct SmgrOpTimerInner { + global_latency_histo: Histogram, // Optional because not all op types are tracked per-timeline - per_timeline_latency_histo: Option<&'a Histogram>, + per_timeline_latency_histo: Option, - ctx: &'c RequestContext, - start: std::time::Instant, + global_flush_in_progress_micros: IntCounter, + per_timeline_flush_in_progress_micros: IntCounter, + + start: Instant, + throttled: Duration, op: SmgrQueryType, - count: usize, } -impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> { - fn drop(&mut self) { - let elapsed = self.start.elapsed(); - let ex_throttled = self - .ctx - .micros_spent_throttled - .close_and_checked_sub_from(elapsed); - let ex_throttled = match ex_throttled { - Ok(res) => res, - Err(error) => { +pub(crate) struct SmgrOpFlushInProgress { + base: Instant, + global_micros: IntCounter, + per_timeline_micros: IntCounter, +} + +impl SmgrOpTimer { + pub(crate) fn deduct_throttle(&mut self, throttle: &Option) { + let Some(throttle) = throttle else { + return; + }; + let inner = self.0.as_mut().expect("other public methods consume self"); + inner.throttled += *throttle; + } + + pub(crate) fn observe_smgr_op_completion_and_start_flushing(mut self) -> SmgrOpFlushInProgress { + let (flush_start, inner) = self + .smgr_op_end() + .expect("this method consume self, and the only other caller is drop handler"); + let SmgrOpTimerInner { + global_flush_in_progress_micros, + per_timeline_flush_in_progress_micros, + .. + } = inner; + SmgrOpFlushInProgress { + base: flush_start, + global_micros: global_flush_in_progress_micros, + per_timeline_micros: per_timeline_flush_in_progress_micros, + } + } + + /// Returns `None`` if this method has already been called, `Some` otherwise. + fn smgr_op_end(&mut self) -> Option<(Instant, SmgrOpTimerInner)> { + let inner = self.0.take()?; + + let now = Instant::now(); + let elapsed = now - inner.start; + + let elapsed = match elapsed.checked_sub(inner.throttled) { + Some(elapsed) => elapsed, + None => { use utils::rate_limit::RateLimit; static LOGGED: Lazy>> = Lazy::new(|| { @@ -1246,19 +1287,62 @@ impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> { }))) }); let mut guard = LOGGED.lock().unwrap(); - let rate_limit = &mut guard[self.op]; + let rate_limit = &mut guard[inner.op]; rate_limit.call(|| { - warn!(op=?self.op, error, "error deducting time spent throttled; this message is logged at a global rate limit"); + warn!(op=?inner.op, ?elapsed, ?inner.throttled, "implementation error: time spent throttled exceeds total request wall clock time"); }); - elapsed + elapsed // un-throttled time, more info than just saturating to 0 } }; - for _ in 0..self.count { - self.global_latency_histo - .observe(ex_throttled.as_secs_f64()); - if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo { - per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64()); + let elapsed = elapsed.as_secs_f64(); + + inner.global_latency_histo.observe(elapsed); + if let Some(per_timeline_getpage_histo) = &inner.per_timeline_latency_histo { + per_timeline_getpage_histo.observe(elapsed); + } + + Some((now, inner)) + } +} + +impl Drop for SmgrOpTimer { + fn drop(&mut self) { + self.smgr_op_end(); + } +} + +impl SmgrOpFlushInProgress { + pub(crate) async fn measure(mut self, mut fut: Fut) -> O + where + Fut: std::future::Future, + { + let mut fut = std::pin::pin!(fut); + + let now = Instant::now(); + // Whenever observe_guard gets called, or dropped, + // it adds the time elapsed since its last call to metrics. + // Last call is tracked in `now`. + let mut observe_guard = scopeguard::guard( + || { + let elapsed = now - self.base; + self.global_micros + .inc_by(u64::try_from(elapsed.as_micros()).unwrap()); + self.per_timeline_micros + .inc_by(u64::try_from(elapsed.as_micros()).unwrap()); + self.base = now; + }, + |mut observe| { + observe(); + }, + ); + + loop { + match tokio::time::timeout(Duration::from_secs(10), &mut fut).await { + Ok(v) => return v, + Err(_timeout) => { + (*observe_guard)(); + } } } } @@ -1289,6 +1373,10 @@ pub(crate) struct SmgrQueryTimePerTimeline { global_latency: [Histogram; SmgrQueryType::COUNT], per_timeline_getpage_started: IntCounter, per_timeline_getpage_latency: Histogram, + global_batch_size: Histogram, + per_timeline_batch_size: Histogram, + global_flush_in_progress_micros: IntCounter, + per_timeline_flush_in_progress_micros: IntCounter, } static SMGR_QUERY_STARTED_GLOBAL: Lazy = Lazy::new(|| { @@ -1381,6 +1469,96 @@ static SMGR_QUERY_TIME_GLOBAL: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy> = Lazy::new(|| { + (1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap()) + .map(|v| v.into()) + .collect() +}); + +static PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy = Lazy::new(|| { + register_histogram!( + "pageserver_page_service_batch_size_global", + "Batch size of pageserver page service requests", + PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(), + ) + .expect("failed to define a metric") +}); + +static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy> = Lazy::new(|| { + let mut buckets = Vec::new(); + for i in 0.. { + let bucket = 1 << i; + if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() { + break; + } + buckets.push(bucket.into()); + } + buckets +}); + +static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy = Lazy::new(|| { + register_histogram_vec!( + "pageserver_page_service_batch_size", + "Batch size of pageserver page service requests", + &["tenant_id", "shard_id", "timeline_id"], + PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone() + ) + .expect("failed to define a metric") +}); + +pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_page_service_config_max_batch_size", + "Configured maximum batch size for the server-side batching functionality of page_service. \ + Labels expose more of the configuration parameters.", + &["mode", "execution"] + ) + .expect("failed to define a metric") +}); + +fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) { + PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset(); + let (label_values, value) = match conf { + PageServicePipeliningConfig::Serial => (["serial", "-"], 1), + PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined { + max_batch_size, + execution, + }) => { + let mode = "pipelined"; + let execution = match execution { + PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => { + "concurrent-futures" + } + PageServiceProtocolPipelinedExecutionStrategy::Tasks => "tasks", + }; + ([mode, execution], max_batch_size.get()) + } + }; + PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE + .with_label_values(&label_values) + .set(value.try_into().unwrap()); +} + +static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_page_service_pagestream_flush_in_progress_micros", + "Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \ + If the flush is particularly slow, this counter will be updated periodically to make slow flushes \ + easily discoverable in monitoring. \ + Hence, this is NOT a completion latency historgram.", + &["tenant_id", "shard_id", "timeline_id"], + ) + .expect("failed to define a metric") +}); + +static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy = Lazy::new(|| { + register_int_counter!( + "pageserver_page_service_pagestream_flush_in_progress_micros_global", + "Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.", + ) + .expect("failed to define a metric") +}); + impl SmgrQueryTimePerTimeline { pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self { let tenant_id = tenant_shard_id.tenant_id.to_string(); @@ -1416,78 +1594,65 @@ impl SmgrQueryTimePerTimeline { ]) .unwrap(); + let global_batch_size = PAGE_SERVICE_BATCH_SIZE_GLOBAL.clone(); + let per_timeline_batch_size = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE + .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id]) + .unwrap(); + + let global_flush_in_progress_micros = + PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone(); + let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS + .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id]) + .unwrap(); + Self { global_started, global_latency, per_timeline_getpage_latency, per_timeline_getpage_started, + global_batch_size, + per_timeline_batch_size, + global_flush_in_progress_micros, + per_timeline_flush_in_progress_micros, } } - pub(crate) fn start_timer<'c: 'a, 'a>( - &'a self, - op: SmgrQueryType, - ctx: &'c RequestContext, - ) -> Option { - self.start_timer_many(op, 1, ctx) - } - pub(crate) fn start_timer_many<'c: 'a, 'a>( - &'a self, - op: SmgrQueryType, - count: usize, - ctx: &'c RequestContext, - ) -> Option { - let start = Instant::now(); - + pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, started_at: Instant) -> SmgrOpTimer { self.global_started[op as usize].inc(); - // We subtract time spent throttled from the observed latency. - match ctx.micros_spent_throttled.open() { - Ok(()) => (), - Err(error) => { - use utils::rate_limit::RateLimit; - static LOGGED: Lazy>> = - Lazy::new(|| { - Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| { - RateLimit::new(Duration::from_secs(10)) - }))) - }); - let mut guard = LOGGED.lock().unwrap(); - let rate_limit = &mut guard[op]; - rate_limit.call(|| { - warn!(?op, error, "error opening micros_spent_throttled; this message is logged at a global rate limit"); - }); - } - } - let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) { self.per_timeline_getpage_started.inc(); - Some(&self.per_timeline_getpage_latency) + Some(self.per_timeline_getpage_latency.clone()) } else { None }; - Some(GlobalAndPerTimelineHistogramTimer { - global_latency_histo: &self.global_latency[op as usize], + SmgrOpTimer(Some(SmgrOpTimerInner { + global_latency_histo: self.global_latency[op as usize].clone(), per_timeline_latency_histo, - ctx, - start, + start: started_at, op, - count, - }) + throttled: Duration::ZERO, + global_flush_in_progress_micros: self.global_flush_in_progress_micros.clone(), + per_timeline_flush_in_progress_micros: self + .per_timeline_flush_in_progress_micros + .clone(), + })) + } + + pub(crate) fn observe_getpage_batch_start(&self, batch_size: usize) { + self.global_batch_size.observe(batch_size as f64); + self.per_timeline_batch_size.observe(batch_size as f64); } } #[cfg(test)] mod smgr_query_time_tests { + use std::time::Instant; + use pageserver_api::shard::TenantShardId; use strum::IntoEnumIterator; use utils::id::{TenantId, TimelineId}; - use crate::{ - context::{DownloadBehavior, RequestContext}, - task_mgr::TaskKind, - }; - // Regression test, we used hard-coded string constants before using an enum. #[test] fn op_label_name() { @@ -1531,8 +1696,7 @@ mod smgr_query_time_tests { let (pre_global, pre_per_tenant_timeline) = get_counts(); assert_eq!(pre_per_tenant_timeline, 0); - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download); - let timer = metrics.start_timer(*op, &ctx); + let timer = metrics.start_smgr_op(*op, Instant::now()); drop(timer); let (post_global, post_per_tenant_timeline) = get_counts(); @@ -1579,58 +1743,24 @@ pub(crate) static BASEBACKUP_QUERY_TIME: Lazy = Lazy::new(| } }); -pub(crate) struct BasebackupQueryTimeOngoingRecording<'a, 'c> { +pub(crate) struct BasebackupQueryTimeOngoingRecording<'a> { parent: &'a BasebackupQueryTime, - ctx: &'c RequestContext, start: std::time::Instant, } impl BasebackupQueryTime { - pub(crate) fn start_recording<'c: 'a, 'a>( - &'a self, - ctx: &'c RequestContext, - ) -> BasebackupQueryTimeOngoingRecording<'a, 'a> { + pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> { let start = Instant::now(); - match ctx.micros_spent_throttled.open() { - Ok(()) => (), - Err(error) => { - use utils::rate_limit::RateLimit; - static LOGGED: Lazy> = - Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10)))); - let mut rate_limit = LOGGED.lock().unwrap(); - rate_limit.call(|| { - warn!(error, "error opening micros_spent_throttled; this message is logged at a global rate limit"); - }); - } - } BasebackupQueryTimeOngoingRecording { parent: self, - ctx, start, } } } -impl BasebackupQueryTimeOngoingRecording<'_, '_> { +impl BasebackupQueryTimeOngoingRecording<'_> { pub(crate) fn observe(self, res: &Result) { - let elapsed = self.start.elapsed(); - let ex_throttled = self - .ctx - .micros_spent_throttled - .close_and_checked_sub_from(elapsed); - let ex_throttled = match ex_throttled { - Ok(ex_throttled) => ex_throttled, - Err(error) => { - use utils::rate_limit::RateLimit; - static LOGGED: Lazy> = - Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10)))); - let mut rate_limit = LOGGED.lock().unwrap(); - rate_limit.call(|| { - warn!(error, "error deducting time spent throttled; this message is logged at a global rate limit"); - }); - elapsed - } - }; + let elapsed = self.start.elapsed().as_secs_f64(); // If you want to change categorize of a specific error, also change it in `log_query_error`. let metric = match res { Ok(_) => &self.parent.ok, @@ -1641,7 +1771,7 @@ impl BasebackupQueryTimeOngoingRecording<'_, '_> { } Err(_) => &self.parent.error, }; - metric.observe(ex_throttled.as_secs_f64()); + metric.observe(elapsed); } } @@ -2181,6 +2311,15 @@ pub(crate) static WAL_INGEST: Lazy = Lazy::new(|| WalIngestMet .expect("failed to define a metric"), }); +pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_timeline_wal_records_received", + "Number of WAL records received per shard", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + pub(crate) static WAL_REDO_TIME: Lazy = Lazy::new(|| { register_histogram!( "pageserver_wal_redo_seconds", @@ -2389,7 +2528,8 @@ pub(crate) struct TimelineMetrics { pub load_layer_map_histo: StorageTimeMetrics, pub garbage_collect_histo: StorageTimeMetrics, pub find_gc_cutoffs_histo: StorageTimeMetrics, - pub last_record_gauge: IntGauge, + pub last_record_lsn_gauge: IntGauge, + pub disk_consistent_lsn_gauge: IntGauge, pub pitr_history_size: UIntGauge, pub archival_size: UIntGauge, pub(crate) layer_size_image: UIntGauge, @@ -2407,6 +2547,7 @@ pub(crate) struct TimelineMetrics { pub evictions_with_low_residence_duration: std::sync::RwLock, /// Number of valid LSN leases. pub valid_lsn_lease_count_gauge: UIntGauge, + pub wal_records_received: IntCounter, shutdown: std::sync::atomic::AtomicBool, } @@ -2470,7 +2611,11 @@ impl TimelineMetrics { &shard_id, &timeline_id, ); - let last_record_gauge = LAST_RECORD_LSN + let last_record_lsn_gauge = LAST_RECORD_LSN + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); + + let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); @@ -2560,6 +2705,10 @@ impl TimelineMetrics { .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); + let wal_records_received = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); + TimelineMetrics { tenant_id, shard_id, @@ -2573,7 +2722,8 @@ impl TimelineMetrics { garbage_collect_histo, find_gc_cutoffs_histo, load_layer_map_histo, - last_record_gauge, + last_record_lsn_gauge, + disk_consistent_lsn_gauge, pitr_history_size, archival_size, layer_size_image, @@ -2591,6 +2741,7 @@ impl TimelineMetrics { evictions_with_low_residence_duration, ), valid_lsn_lease_count_gauge, + wal_records_received, shutdown: std::sync::atomic::AtomicBool::default(), } } @@ -2637,6 +2788,7 @@ impl TimelineMetrics { let timeline_id = &self.timeline_id; let shard_id = &self.shard_id; let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); + let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]); { @@ -2722,6 +2874,21 @@ impl TimelineMetrics { shard_id, timeline_id, ]); + let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[ + tenant_id, + shard_id, + timeline_id, + ]); + let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[ + tenant_id, + shard_id, + timeline_id, + ]); + let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[ + tenant_id, + shard_id, + timeline_id, + ]); } } @@ -2747,10 +2914,12 @@ use std::sync::{Arc, Mutex}; use std::task::{Context, Poll}; use std::time::{Duration, Instant}; +use crate::config::PageServerConf; use crate::context::{PageContentKind, RequestContext}; use crate::task_mgr::TaskKind; use crate::tenant::mgr::TenantSlot; use crate::tenant::tasks::BackgroundLoopKind; +use crate::tenant::Timeline; /// Maintain a per timeline gauge in addition to the global gauge. pub(crate) struct PerTimelineRemotePhysicalSizeGauge { @@ -2793,6 +2962,7 @@ pub(crate) struct RemoteTimelineClientMetrics { calls: Mutex>, bytes_started_counter: Mutex>, bytes_finished_counter: Mutex>, + pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge, } impl RemoteTimelineClientMetrics { @@ -2807,6 +2977,10 @@ impl RemoteTimelineClientMetrics { .unwrap(), ); + let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN + .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str]) + .unwrap(); + RemoteTimelineClientMetrics { tenant_id: tenant_id_str, shard_id: shard_id_str, @@ -2815,6 +2989,7 @@ impl RemoteTimelineClientMetrics { bytes_started_counter: Mutex::new(HashMap::default()), bytes_finished_counter: Mutex::new(HashMap::default()), remote_physical_size_gauge, + projected_remote_consistent_lsn_gauge, } } @@ -3028,6 +3203,7 @@ impl Drop for RemoteTimelineClientMetrics { calls, bytes_started_counter, bytes_finished_counter, + projected_remote_consistent_lsn_gauge, } = self; for ((a, b), _) in calls.get_mut().unwrap().drain() { let mut res = [Ok(()), Ok(())]; @@ -3057,6 +3233,14 @@ impl Drop for RemoteTimelineClientMetrics { let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); } + { + let _ = projected_remote_consistent_lsn_gauge; + let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[ + tenant_id, + shard_id, + timeline_id, + ]); + } } } @@ -3307,7 +3491,7 @@ pub(crate) mod tenant_throttling { use once_cell::sync::Lazy; use utils::shard::TenantShardId; - use crate::tenant::{self, throttle::Metric}; + use crate::tenant::{self}; struct GlobalAndPerTenantIntCounter { global: IntCounter, @@ -3326,7 +3510,7 @@ pub(crate) mod tenant_throttling { } } - pub(crate) struct TimelineGet { + pub(crate) struct Metrics { count_accounted_start: GlobalAndPerTenantIntCounter, count_accounted_finish: GlobalAndPerTenantIntCounter, wait_time: GlobalAndPerTenantIntCounter, @@ -3399,40 +3583,41 @@ pub(crate) mod tenant_throttling { .unwrap() }); - const KIND: &str = "timeline_get"; + const KINDS: &[&str] = &["pagestream"]; + pub type Pagestream = Metrics<0>; - impl TimelineGet { + impl Metrics { pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self { let per_tenant_label_values = &[ - KIND, + KINDS[KIND], &tenant_shard_id.tenant_id.to_string(), &tenant_shard_id.shard_slug().to_string(), ]; - TimelineGet { + Metrics { count_accounted_start: { GlobalAndPerTenantIntCounter { - global: COUNT_ACCOUNTED_START.with_label_values(&[KIND]), + global: COUNT_ACCOUNTED_START.with_label_values(&[KINDS[KIND]]), per_tenant: COUNT_ACCOUNTED_START_PER_TENANT .with_label_values(per_tenant_label_values), } }, count_accounted_finish: { GlobalAndPerTenantIntCounter { - global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KIND]), + global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KINDS[KIND]]), per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT .with_label_values(per_tenant_label_values), } }, wait_time: { GlobalAndPerTenantIntCounter { - global: WAIT_USECS.with_label_values(&[KIND]), + global: WAIT_USECS.with_label_values(&[KINDS[KIND]]), per_tenant: WAIT_USECS_PER_TENANT .with_label_values(per_tenant_label_values), } }, count_throttled: { GlobalAndPerTenantIntCounter { - global: WAIT_COUNT.with_label_values(&[KIND]), + global: WAIT_COUNT.with_label_values(&[KINDS[KIND]]), per_tenant: WAIT_COUNT_PER_TENANT .with_label_values(per_tenant_label_values), } @@ -3455,15 +3640,17 @@ pub(crate) mod tenant_throttling { &WAIT_USECS_PER_TENANT, &WAIT_COUNT_PER_TENANT, ] { - let _ = m.remove_label_values(&[ - KIND, - &tenant_shard_id.tenant_id.to_string(), - &tenant_shard_id.shard_slug().to_string(), - ]); + for kind in KINDS { + let _ = m.remove_label_values(&[ + kind, + &tenant_shard_id.tenant_id.to_string(), + &tenant_shard_id.shard_slug().to_string(), + ]); + } } } - impl Metric for TimelineGet { + impl tenant::throttle::Metric for Metrics { #[inline(always)] fn accounting_start(&self) { self.count_accounted_start.inc(); @@ -3562,7 +3749,9 @@ pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) { .set(u64::try_from(num_threads.get()).unwrap()); } -pub fn preinitialize_metrics() { +pub fn preinitialize_metrics(conf: &'static PageServerConf) { + set_page_service_config_max_batch_size(&conf.page_service_pipelining); + // Python tests need these and on some we do alerting. // // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of @@ -3630,6 +3819,7 @@ pub fn preinitialize_metrics() { &WAL_REDO_RECORDS_HISTOGRAM, &WAL_REDO_BYTES_HISTOGRAM, &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, + &PAGE_SERVICE_BATCH_SIZE_GLOBAL, ] .into_iter() .for_each(|h| { diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 1917e7f5b7ea..97d94bbe7f33 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -51,7 +51,7 @@ use crate::auth::check_permission; use crate::basebackup::BasebackupError; use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; -use crate::metrics::{self}; +use crate::metrics::{self, SmgrOpTimer}; use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS}; use crate::pgdatadir_mapping::Version; use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; @@ -540,11 +540,13 @@ impl From for QueryError { enum BatchedFeMessage { Exists { span: Span, + timer: SmgrOpTimer, shard: timeline::handle::Handle, req: models::PagestreamExistsRequest, }, Nblocks { span: Span, + timer: SmgrOpTimer, shard: timeline::handle::Handle, req: models::PagestreamNblocksRequest, }, @@ -552,15 +554,17 @@ enum BatchedFeMessage { span: Span, shard: timeline::handle::Handle, effective_request_lsn: Lsn, - pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>, + pages: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>, }, DbSize { span: Span, + timer: SmgrOpTimer, shard: timeline::handle::Handle, req: models::PagestreamDbSizeRequest, }, GetSlruSegment { span: Span, + timer: SmgrOpTimer, shard: timeline::handle::Handle, req: models::PagestreamGetSlruSegmentRequest, }, @@ -570,6 +574,41 @@ enum BatchedFeMessage { }, } +impl BatchedFeMessage { + async fn throttle(&mut self, cancel: &CancellationToken) -> Result<(), QueryError> { + let (shard, tokens, timers) = match self { + BatchedFeMessage::Exists { shard, timer, .. } + | BatchedFeMessage::Nblocks { shard, timer, .. } + | BatchedFeMessage::DbSize { shard, timer, .. } + | BatchedFeMessage::GetSlruSegment { shard, timer, .. } => { + ( + shard, + // 1 token is probably under-estimating because these + // request handlers typically do several Timeline::get calls. + 1, + itertools::Either::Left(std::iter::once(timer)), + ) + } + BatchedFeMessage::GetPage { shard, pages, .. } => ( + shard, + pages.len(), + itertools::Either::Right(pages.iter_mut().map(|(_, _, timer)| timer)), + ), + BatchedFeMessage::RespondError { .. } => return Ok(()), + }; + let throttled = tokio::select! { + throttled = shard.pagestream_throttle.throttle(tokens) => { throttled } + _ = cancel.cancelled() => { + return Err(QueryError::Shutdown); + } + }; + for timer in timers { + timer.deduct_throttle(&throttled); + } + Ok(()) + } +} + impl PageServerHandler { pub fn new( tenant_manager: Arc, @@ -632,6 +671,8 @@ impl PageServerHandler { msg = pgb.read_message() => { msg } }; + let received_at = Instant::now(); + let copy_data_bytes = match msg? { Some(FeMessage::CopyData(bytes)) => bytes, Some(FeMessage::Terminate) => { @@ -660,7 +701,15 @@ impl PageServerHandler { .get(tenant_id, timeline_id, ShardSelector::Zero) .instrument(span.clone()) // sets `shard_id` field .await?; - BatchedFeMessage::Exists { span, shard, req } + let timer = shard + .query_metrics + .start_smgr_op(metrics::SmgrQueryType::GetRelExists, received_at); + BatchedFeMessage::Exists { + span, + timer, + shard, + req, + } } PagestreamFeMessage::Nblocks(req) => { let span = tracing::info_span!(parent: parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn); @@ -668,7 +717,15 @@ impl PageServerHandler { .get(tenant_id, timeline_id, ShardSelector::Zero) .instrument(span.clone()) // sets `shard_id` field .await?; - BatchedFeMessage::Nblocks { span, shard, req } + let timer = shard + .query_metrics + .start_smgr_op(metrics::SmgrQueryType::GetRelSize, received_at); + BatchedFeMessage::Nblocks { + span, + timer, + shard, + req, + } } PagestreamFeMessage::DbSize(req) => { let span = tracing::info_span!(parent: parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn); @@ -676,7 +733,15 @@ impl PageServerHandler { .get(tenant_id, timeline_id, ShardSelector::Zero) .instrument(span.clone()) // sets `shard_id` field .await?; - BatchedFeMessage::DbSize { span, shard, req } + let timer = shard + .query_metrics + .start_smgr_op(metrics::SmgrQueryType::GetDbSize, received_at); + BatchedFeMessage::DbSize { + span, + timer, + shard, + req, + } } PagestreamFeMessage::GetSlruSegment(req) => { let span = tracing::info_span!(parent: parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn); @@ -684,7 +749,15 @@ impl PageServerHandler { .get(tenant_id, timeline_id, ShardSelector::Zero) .instrument(span.clone()) // sets `shard_id` field .await?; - BatchedFeMessage::GetSlruSegment { span, shard, req } + let timer = shard + .query_metrics + .start_smgr_op(metrics::SmgrQueryType::GetSlruSegment, received_at); + BatchedFeMessage::GetSlruSegment { + span, + timer, + shard, + req, + } } PagestreamFeMessage::GetPage(PagestreamGetPageRequest { request_lsn, @@ -728,6 +801,14 @@ impl PageServerHandler { return respond_error!(e.into()); } }; + + // It's important to start the timer before waiting for the LSN + // so that the _started counters are incremented before we do + // any serious waiting, e.g., for LSNs. + let timer = shard + .query_metrics + .start_smgr_op(metrics::SmgrQueryType::GetPageAtLsn, received_at); + let effective_request_lsn = match Self::wait_or_get_last_lsn( &shard, request_lsn, @@ -747,7 +828,7 @@ impl PageServerHandler { span, shard, effective_request_lsn, - pages: smallvec::smallvec![(rel, blkno)], + pages: smallvec::smallvec![(rel, blkno, timer)], } } }; @@ -832,90 +913,112 @@ impl PageServerHandler { IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, { // invoke handler function - let (handler_results, span): (Vec>, _) = - match batch { - BatchedFeMessage::Exists { span, shard, req } => { - fail::fail_point!("ps::handle-pagerequest-message::exists"); - ( - vec![ - self.handle_get_rel_exists_request(&shard, &req, ctx) - .instrument(span.clone()) - .await, - ], - span, - ) - } - BatchedFeMessage::Nblocks { span, shard, req } => { - fail::fail_point!("ps::handle-pagerequest-message::nblocks"); - ( - vec![ - self.handle_get_nblocks_request(&shard, &req, ctx) - .instrument(span.clone()) - .await, - ], - span, - ) - } - BatchedFeMessage::GetPage { + let (handler_results, span): ( + Vec>, + _, + ) = match batch { + BatchedFeMessage::Exists { + span, + timer, + shard, + req, + } => { + fail::fail_point!("ps::handle-pagerequest-message::exists"); + ( + vec![self + .handle_get_rel_exists_request(&shard, &req, ctx) + .instrument(span.clone()) + .await + .map(|msg| (msg, timer))], span, - shard, - effective_request_lsn, - pages, - } => { - fail::fail_point!("ps::handle-pagerequest-message::getpage"); - ( - { - let npages = pages.len(); - trace!(npages, "handling getpage request"); - let res = self - .handle_get_page_at_lsn_request_batched( - &shard, - effective_request_lsn, - pages, - ctx, - ) - .instrument(span.clone()) - .await; - assert_eq!(res.len(), npages); - res - }, - span, - ) - } - BatchedFeMessage::DbSize { span, shard, req } => { - fail::fail_point!("ps::handle-pagerequest-message::dbsize"); - ( - vec![ - self.handle_db_size_request(&shard, &req, ctx) - .instrument(span.clone()) - .await, - ], - span, - ) - } - BatchedFeMessage::GetSlruSegment { span, shard, req } => { - fail::fail_point!("ps::handle-pagerequest-message::slrusegment"); - ( - vec![ - self.handle_get_slru_segment_request(&shard, &req, ctx) - .instrument(span.clone()) - .await, - ], - span, - ) - } - BatchedFeMessage::RespondError { span, error } => { - // We've already decided to respond with an error, so we don't need to - // call the handler. - (vec![Err(error)], span) - } - }; + ) + } + BatchedFeMessage::Nblocks { + span, + timer, + shard, + req, + } => { + fail::fail_point!("ps::handle-pagerequest-message::nblocks"); + ( + vec![self + .handle_get_nblocks_request(&shard, &req, ctx) + .instrument(span.clone()) + .await + .map(|msg| (msg, timer))], + span, + ) + } + BatchedFeMessage::GetPage { + span, + shard, + effective_request_lsn, + pages, + } => { + fail::fail_point!("ps::handle-pagerequest-message::getpage"); + ( + { + let npages = pages.len(); + trace!(npages, "handling getpage request"); + let res = self + .handle_get_page_at_lsn_request_batched( + &shard, + effective_request_lsn, + pages, + ctx, + ) + .instrument(span.clone()) + .await; + assert_eq!(res.len(), npages); + res + }, + span, + ) + } + BatchedFeMessage::DbSize { + span, + timer, + shard, + req, + } => { + fail::fail_point!("ps::handle-pagerequest-message::dbsize"); + ( + vec![self + .handle_db_size_request(&shard, &req, ctx) + .instrument(span.clone()) + .await + .map(|msg| (msg, timer))], + span, + ) + } + BatchedFeMessage::GetSlruSegment { + span, + timer, + shard, + req, + } => { + fail::fail_point!("ps::handle-pagerequest-message::slrusegment"); + ( + vec![self + .handle_get_slru_segment_request(&shard, &req, ctx) + .instrument(span.clone()) + .await + .map(|msg| (msg, timer))], + span, + ) + } + BatchedFeMessage::RespondError { span, error } => { + // We've already decided to respond with an error, so we don't need to + // call the handler. + (vec![Err(error)], span) + } + }; // Map handler result to protocol behavior. // Some handler errors cause exit from pagestream protocol. // Other handler errors are sent back as an error message and we stay in pagestream protocol. for handler_result in handler_results { - let response_msg = match handler_result { + let (response_msg, timer) = match handler_result { Err(e) => match &e { PageStreamError::Shutdown => { // If we fail to fulfil a request during shutdown, which may be _because_ of @@ -939,27 +1042,65 @@ impl PageServerHandler { span.in_scope(|| { error!("error reading relation or page version: {full:#}") }); - PagestreamBeMessage::Error(PagestreamErrorResponse { - message: e.to_string(), - }) + ( + PagestreamBeMessage::Error(PagestreamErrorResponse { + message: e.to_string(), + }), + None, // TODO: measure errors + ) } }, - Ok(response_msg) => response_msg, + Ok((response_msg, timer)) => (response_msg, Some(timer)), }; + // // marshal & transmit response message + // + pgb_writer.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?; - } - tokio::select! { - biased; - _ = cancel.cancelled() => { - // We were requested to shut down. - info!("shutdown request received in page handler"); - return Err(QueryError::Shutdown) - } - res = pgb_writer.flush() => { - res?; + + // We purposefully don't count flush time into the timer. + // + // The reason is that current compute client will not perform protocol processing + // if the postgres backend process is doing things other than `->smgr_read()`. + // This is especially the case for prefetch. + // + // If the compute doesn't read from the connection, eventually TCP will backpressure + // all the way into our flush call below. + // + // The timer's underlying metric is used for a storage-internal latency SLO and + // we don't want to include latency in it that we can't control. + // And as pointed out above, in this case, we don't control the time that flush will take. + let flushing_timer = + timer.map(|timer| timer.observe_smgr_op_completion_and_start_flushing()); + + // what we want to do + let flush_fut = pgb_writer.flush(); + // metric for how long flushing takes + let flush_fut = match flushing_timer { + Some(flushing_timer) => { + futures::future::Either::Left(flushing_timer.measure(flush_fut)) + } + None => futures::future::Either::Right(flush_fut), + }; + // do it while respecting cancellation + let _: () = async move { + tokio::select! { + biased; + _ = cancel.cancelled() => { + // We were requested to shut down. + info!("shutdown request received in page handler"); + return Err(QueryError::Shutdown) + } + res = flush_fut => { + res?; + } + } + Ok(()) } + // and log the info! line inside the request span + .instrument(span.clone()) + .await?; } Ok(()) } @@ -1081,13 +1222,18 @@ impl PageServerHandler { Ok(msg) => msg, Err(e) => break e, }; - let msg = match msg { + let mut msg = match msg { Some(msg) => msg, None => { debug!("pagestream subprotocol end observed"); return ((pgb_reader, timeline_handles), Ok(())); } }; + + if let Err(cancelled) = msg.throttle(&self.cancel).await { + break cancelled; + } + let err = self .pagesteam_handle_batched_message(pgb_writer, msg, &cancel, ctx) .await; @@ -1245,12 +1391,13 @@ impl PageServerHandler { return Ok(()); } }; - let batch = match batch { + let mut batch = match batch { Ok(batch) => batch, Err(e) => { return Err(e); } }; + batch.throttle(&self.cancel).await?; self.pagesteam_handle_batched_message(pgb_writer, batch, &cancel, &ctx) .await?; } @@ -1423,10 +1570,6 @@ impl PageServerHandler { req: &PagestreamExistsRequest, ctx: &RequestContext, ) -> Result { - let _timer = timeline - .query_metrics - .start_timer(metrics::SmgrQueryType::GetRelExists, ctx); - let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( timeline, @@ -1453,10 +1596,6 @@ impl PageServerHandler { req: &PagestreamNblocksRequest, ctx: &RequestContext, ) -> Result { - let _timer = timeline - .query_metrics - .start_timer(metrics::SmgrQueryType::GetRelSize, ctx); - let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( timeline, @@ -1483,10 +1622,6 @@ impl PageServerHandler { req: &PagestreamDbSizeRequest, ctx: &RequestContext, ) -> Result { - let _timer = timeline - .query_metrics - .start_timer(metrics::SmgrQueryType::GetDbSize, ctx); - let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( timeline, @@ -1512,26 +1647,41 @@ impl PageServerHandler { &mut self, timeline: &Timeline, effective_lsn: Lsn, - pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>, + requests: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>, ctx: &RequestContext, - ) -> Vec> { + ) -> Vec> { debug_assert_current_span_has_tenant_and_timeline_id(); - let _timer = timeline.query_metrics.start_timer_many( - metrics::SmgrQueryType::GetPageAtLsn, - pages.len(), - ctx, - ); - let pages = timeline - .get_rel_page_at_lsn_batched(pages, effective_lsn, ctx) - .await; + timeline + .query_metrics + .observe_getpage_batch_start(requests.len()); - Vec::from_iter(pages.into_iter().map(|page| { - page.map(|page| { - PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse { page }) - }) - .map_err(PageStreamError::from) - })) + let results = timeline + .get_rel_page_at_lsn_batched( + requests.iter().map(|(reltag, blkno, _)| (reltag, blkno)), + effective_lsn, + ctx, + ) + .await; + assert_eq!(results.len(), requests.len()); + + // TODO: avoid creating the new Vec here + Vec::from_iter( + requests + .into_iter() + .zip(results.into_iter()) + .map(|((_, _, timer), res)| { + res.map(|page| { + ( + PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse { + page, + }), + timer, + ) + }) + .map_err(PageStreamError::from) + }), + ) } #[instrument(skip_all, fields(shard_id))] @@ -1541,10 +1691,6 @@ impl PageServerHandler { req: &PagestreamGetSlruSegmentRequest, ctx: &RequestContext, ) -> Result { - let _timer = timeline - .query_metrics - .start_timer(metrics::SmgrQueryType::GetSlruSegment, ctx); - let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( timeline, @@ -2045,7 +2191,7 @@ where COMPUTE_COMMANDS_COUNTERS .for_command(ComputeCommandKind::Basebackup) .inc(); - let metric_recording = metrics::BASEBACKUP_QUERY_TIME.start_recording(&ctx); + let metric_recording = metrics::BASEBACKUP_QUERY_TIME.start_recording(); let res = async { self.handle_basebackup_request( pgb, diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index d48a1ba117fc..255bd01e259e 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -203,9 +203,13 @@ impl Timeline { ) -> Result { match version { Version::Lsn(effective_lsn) => { - let pages = smallvec::smallvec![(tag, blknum)]; + let pages: smallvec::SmallVec<[_; 1]> = smallvec::smallvec![(tag, blknum)]; let res = self - .get_rel_page_at_lsn_batched(pages, effective_lsn, ctx) + .get_rel_page_at_lsn_batched( + pages.iter().map(|(tag, blknum)| (tag, blknum)), + effective_lsn, + ctx, + ) .await; assert_eq!(res.len(), 1); res.into_iter().next().unwrap() @@ -240,7 +244,7 @@ impl Timeline { /// The ordering of the returned vec corresponds to the ordering of `pages`. pub(crate) async fn get_rel_page_at_lsn_batched( &self, - pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>, + pages: impl ExactSizeIterator, effective_lsn: Lsn, ctx: &RequestContext, ) -> Vec> { @@ -254,7 +258,7 @@ impl Timeline { let result_slots = result.spare_capacity_mut(); let mut keys_slots: BTreeMap> = BTreeMap::default(); - for (response_slot_idx, (tag, blknum)) in pages.into_iter().enumerate() { + for (response_slot_idx, (tag, blknum)) in pages.enumerate() { if tag.relnode == 0 { result_slots[response_slot_idx].write(Err(PageReconstructError::Other( RelationError::InvalidRelnode.into(), @@ -265,7 +269,7 @@ impl Timeline { } let nblocks = match self - .get_rel_size(tag, Version::Lsn(effective_lsn), ctx) + .get_rel_size(*tag, Version::Lsn(effective_lsn), ctx) .await { Ok(nblocks) => nblocks, @@ -276,7 +280,7 @@ impl Timeline { } }; - if blknum >= nblocks { + if *blknum >= nblocks { debug!( "read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page", tag, blknum, effective_lsn, nblocks @@ -286,7 +290,7 @@ impl Timeline { continue; } - let key = rel_block_to_key(tag, blknum); + let key = rel_block_to_key(*tag, *blknum); let key_slots = keys_slots.entry(key).or_default(); key_slots.push(response_slot_idx); @@ -526,6 +530,7 @@ impl Timeline { lsn: Lsn, ctx: &RequestContext, ) -> Result { + assert!(self.tenant_shard_id.is_shard_zero()); let n_blocks = self .get_slru_segment_size(kind, segno, Version::Lsn(lsn), ctx) .await?; @@ -548,6 +553,7 @@ impl Timeline { lsn: Lsn, ctx: &RequestContext, ) -> Result { + assert!(self.tenant_shard_id.is_shard_zero()); let key = slru_block_to_key(kind, segno, blknum); self.get(key, lsn, ctx).await } @@ -560,6 +566,7 @@ impl Timeline { version: Version<'_>, ctx: &RequestContext, ) -> Result { + assert!(self.tenant_shard_id.is_shard_zero()); let key = slru_segment_size_to_key(kind, segno); let mut buf = version.get(self, key, ctx).await?; Ok(buf.get_u32_le()) @@ -573,6 +580,7 @@ impl Timeline { version: Version<'_>, ctx: &RequestContext, ) -> Result { + assert!(self.tenant_shard_id.is_shard_zero()); // fetch directory listing let key = slru_dir_to_key(kind); let buf = version.get(self, key, ctx).await?; @@ -1043,26 +1051,28 @@ impl Timeline { } // Iterate SLRUs next - for kind in [ - SlruKind::Clog, - SlruKind::MultiXactMembers, - SlruKind::MultiXactOffsets, - ] { - let slrudir_key = slru_dir_to_key(kind); - result.add_key(slrudir_key); - let buf = self.get(slrudir_key, lsn, ctx).await?; - let dir = SlruSegmentDirectory::des(&buf)?; - let mut segments: Vec = dir.segments.iter().cloned().collect(); - segments.sort_unstable(); - for segno in segments { - let segsize_key = slru_segment_size_to_key(kind, segno); - let mut buf = self.get(segsize_key, lsn, ctx).await?; - let segsize = buf.get_u32_le(); - - result.add_range( - slru_block_to_key(kind, segno, 0)..slru_block_to_key(kind, segno, segsize), - ); - result.add_key(segsize_key); + if self.tenant_shard_id.is_shard_zero() { + for kind in [ + SlruKind::Clog, + SlruKind::MultiXactMembers, + SlruKind::MultiXactOffsets, + ] { + let slrudir_key = slru_dir_to_key(kind); + result.add_key(slrudir_key); + let buf = self.get(slrudir_key, lsn, ctx).await?; + let dir = SlruSegmentDirectory::des(&buf)?; + let mut segments: Vec = dir.segments.iter().cloned().collect(); + segments.sort_unstable(); + for segno in segments { + let segsize_key = slru_segment_size_to_key(kind, segno); + let mut buf = self.get(segsize_key, lsn, ctx).await?; + let segsize = buf.get_u32_le(); + + result.add_range( + slru_block_to_key(kind, segno, 0)..slru_block_to_key(kind, segno, segsize), + ); + result.add_key(segsize_key); + } } } @@ -1464,6 +1474,10 @@ impl<'a> DatadirModification<'a> { blknum: BlockNumber, rec: NeonWalRecord, ) -> anyhow::Result<()> { + if !self.tline.tenant_shard_id.is_shard_zero() { + return Ok(()); + } + self.put( slru_block_to_key(kind, segno, blknum), Value::WalRecord(rec), @@ -1497,6 +1511,8 @@ impl<'a> DatadirModification<'a> { blknum: BlockNumber, img: Bytes, ) -> anyhow::Result<()> { + assert!(self.tline.tenant_shard_id.is_shard_zero()); + let key = slru_block_to_key(kind, segno, blknum); if !key.is_valid_key_on_write_path() { anyhow::bail!( @@ -1538,6 +1554,7 @@ impl<'a> DatadirModification<'a> { segno: u32, blknum: BlockNumber, ) -> anyhow::Result<()> { + assert!(self.tline.tenant_shard_id.is_shard_zero()); let key = slru_block_to_key(kind, segno, blknum); if !key.is_valid_key_on_write_path() { anyhow::bail!( @@ -1849,6 +1866,8 @@ impl<'a> DatadirModification<'a> { nblocks: BlockNumber, ctx: &RequestContext, ) -> anyhow::Result<()> { + assert!(self.tline.tenant_shard_id.is_shard_zero()); + // Add it to the directory entry let dir_key = slru_dir_to_key(kind); let buf = self.get(dir_key, ctx).await?; @@ -1881,6 +1900,8 @@ impl<'a> DatadirModification<'a> { segno: u32, nblocks: BlockNumber, ) -> anyhow::Result<()> { + assert!(self.tline.tenant_shard_id.is_shard_zero()); + // Put size let size_key = slru_segment_size_to_key(kind, segno); let buf = nblocks.to_le_bytes(); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index cd0690bb1a57..4a9c44aefdbc 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -37,14 +37,19 @@ use remote_timeline_client::manifest::{ }; use remote_timeline_client::UploadQueueNotReadyError; use std::collections::BTreeMap; +use std::collections::VecDeque; use std::fmt; use std::future::Future; use std::sync::atomic::AtomicBool; use std::sync::Weak; use std::time::SystemTime; use storage_broker::BrokerClientChannel; +use timeline::compaction::ScheduledCompactionTask; use timeline::import_pgdata; use timeline::offload::offload_timeline; +use timeline::CompactFlags; +use timeline::CompactOptions; +use timeline::CompactionError; use timeline::ShutdownMode; use tokio::io::BufReader; use tokio::sync::watch; @@ -339,6 +344,11 @@ pub struct Tenant { /// Overhead of mutex is acceptable because compaction is done with a multi-second period. compaction_circuit_breaker: std::sync::Mutex, + /// Scheduled compaction tasks. Currently, this can only be populated by triggering + /// a manual gc-compaction from the manual compaction API. + scheduled_compaction_tasks: + std::sync::Mutex>>, + /// If the tenant is in Activating state, notify this to encourage it /// to proceed to Active as soon as possible, rather than waiting for lazy /// background warmup. @@ -357,8 +367,8 @@ pub struct Tenant { /// Throttle applied at the top of [`Timeline::get`]. /// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance. - pub(crate) timeline_get_throttle: - Arc>, + pub(crate) pagestream_throttle: + Arc>, /// An ongoing timeline detach concurrency limiter. /// @@ -1678,7 +1688,7 @@ impl Tenant { remote_metadata, TimelineResources { remote_client, - timeline_get_throttle: self.timeline_get_throttle.clone(), + pagestream_throttle: self.pagestream_throttle.clone(), l0_flush_global_state: self.l0_flush_global_state.clone(), }, LoadTimelineCause::Attach, @@ -2953,27 +2963,100 @@ impl Tenant { for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload { + // pending_task_left == None: cannot compact, maybe still pending tasks + // pending_task_left == Some(true): compaction task left + // pending_task_left == Some(false): no compaction task left let pending_task_left = if *can_compact { - Some( - timeline - .compact(cancel, EnumSet::empty(), ctx) - .instrument(info_span!("compact_timeline", %timeline_id)) - .await - .inspect_err(|e| match e { - timeline::CompactionError::ShuttingDown => (), - timeline::CompactionError::Offload(_) => { - // Failures to offload timelines do not trip the circuit breaker, because - // they do not do lots of writes the way compaction itself does: it is cheap - // to retry, and it would be bad to stop all compaction because of an issue with offloading. + let has_pending_l0_compaction_task = timeline + .compact(cancel, EnumSet::empty(), ctx) + .instrument(info_span!("compact_timeline", %timeline_id)) + .await + .inspect_err(|e| match e { + timeline::CompactionError::ShuttingDown => (), + timeline::CompactionError::Offload(_) => { + // Failures to offload timelines do not trip the circuit breaker, because + // they do not do lots of writes the way compaction itself does: it is cheap + // to retry, and it would be bad to stop all compaction because of an issue with offloading. + } + timeline::CompactionError::Other(e) => { + self.compaction_circuit_breaker + .lock() + .unwrap() + .fail(&CIRCUIT_BREAKERS_BROKEN, e); + } + })?; + if has_pending_l0_compaction_task { + Some(true) + } else { + let mut has_pending_scheduled_compaction_task; + let next_scheduled_compaction_task = { + let mut guard = self.scheduled_compaction_tasks.lock().unwrap(); + if let Some(tline_pending_tasks) = guard.get_mut(timeline_id) { + if !tline_pending_tasks.is_empty() { + info!( + "{} tasks left in the compaction schedule queue", + tline_pending_tasks.len() + ); } - timeline::CompactionError::Other(e) => { - self.compaction_circuit_breaker - .lock() - .unwrap() - .fail(&CIRCUIT_BREAKERS_BROKEN, e); + let next_task = tline_pending_tasks.pop_front(); + has_pending_scheduled_compaction_task = !tline_pending_tasks.is_empty(); + next_task + } else { + has_pending_scheduled_compaction_task = false; + None + } + }; + if let Some(mut next_scheduled_compaction_task) = next_scheduled_compaction_task + { + if !next_scheduled_compaction_task + .options + .flags + .contains(CompactFlags::EnhancedGcBottomMostCompaction) + { + warn!("ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}", next_scheduled_compaction_task.options); + } else if next_scheduled_compaction_task.options.sub_compaction { + info!("running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); + let jobs = timeline + .gc_compaction_split_jobs(next_scheduled_compaction_task.options) + .await + .map_err(CompactionError::Other)?; + if jobs.is_empty() { + info!("no jobs to run, skipping scheduled compaction task"); + } else { + has_pending_scheduled_compaction_task = true; + let jobs_len = jobs.len(); + let mut guard = self.scheduled_compaction_tasks.lock().unwrap(); + let tline_pending_tasks = guard.entry(*timeline_id).or_default(); + for (idx, job) in jobs.into_iter().enumerate() { + tline_pending_tasks.push_back(ScheduledCompactionTask { + options: job, + result_tx: if idx == jobs_len - 1 { + // The last compaction job sends the completion signal + next_scheduled_compaction_task.result_tx.take() + } else { + None + }, + }); + } + info!("scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs", jobs_len); } - })?, - ) + } else { + let _ = timeline + .compact_with_options( + cancel, + next_scheduled_compaction_task.options, + ctx, + ) + .instrument(info_span!("scheduled_compact_timeline", %timeline_id)) + .await?; + if let Some(tx) = next_scheduled_compaction_task.result_tx.take() { + // TODO: we can send compaction statistics in the future + tx.send(()).ok(); + } + } + } + Some(has_pending_scheduled_compaction_task) + } } else { None }; @@ -2993,6 +3076,36 @@ impl Tenant { Ok(has_pending_task) } + /// Cancel scheduled compaction tasks + pub(crate) fn cancel_scheduled_compaction( + &self, + timeline_id: TimelineId, + ) -> Vec { + let mut guard = self.scheduled_compaction_tasks.lock().unwrap(); + if let Some(tline_pending_tasks) = guard.get_mut(&timeline_id) { + let current_tline_pending_tasks = std::mem::take(tline_pending_tasks); + current_tline_pending_tasks.into_iter().collect() + } else { + Vec::new() + } + } + + /// Schedule a compaction task for a timeline. + pub(crate) async fn schedule_compaction( + &self, + timeline_id: TimelineId, + options: CompactOptions, + ) -> tokio::sync::oneshot::Receiver<()> { + let (tx, rx) = tokio::sync::oneshot::channel(); + let mut guard = self.scheduled_compaction_tasks.lock().unwrap(); + let tline_pending_tasks = guard.entry(timeline_id).or_default(); + tline_pending_tasks.push_back(ScheduledCompactionTask { + options, + result_tx: Some(tx), + }); + rx + } + // Call through to all timelines to freeze ephemeral layers if needed. Usually // this happens during ingest: this background housekeeping is for freezing layers // that are open but haven't been written to for some time. @@ -3422,7 +3535,7 @@ impl Tenant { r.map_err( |_e: tokio::sync::watch::error::RecvError| // Tenant existed but was dropped: report it as non-existent - GetActiveTenantError::NotFound(GetTenantError::NotFound(self.tenant_shard_id.tenant_id)) + GetActiveTenantError::NotFound(GetTenantError::ShardNotFound(self.tenant_shard_id)) )? } Err(TimeoutCancellableError::Cancelled) => { @@ -3835,7 +3948,7 @@ impl Tenant { } } - fn get_timeline_get_throttle_config( + fn get_pagestream_throttle_config( psconf: &'static PageServerConf, overrides: &TenantConfOpt, ) -> throttle::Config { @@ -3846,8 +3959,8 @@ impl Tenant { } pub(crate) fn tenant_conf_updated(&self, new_conf: &TenantConfOpt) { - let conf = Self::get_timeline_get_throttle_config(self.conf, new_conf); - self.timeline_get_throttle.reconfigure(conf) + let conf = Self::get_pagestream_throttle_config(self.conf, new_conf); + self.pagestream_throttle.reconfigure(conf) } /// Helper function to create a new Timeline struct. @@ -4005,13 +4118,14 @@ impl Tenant { // use an extremely long backoff. Some(Duration::from_secs(3600 * 24)), )), + scheduled_compaction_tasks: Mutex::new(Default::default()), activate_now_sem: tokio::sync::Semaphore::new(0), attach_wal_lag_cooldown: Arc::new(std::sync::OnceLock::new()), cancel: CancellationToken::default(), gate: Gate::default(), - timeline_get_throttle: Arc::new(throttle::Throttle::new( - Tenant::get_timeline_get_throttle_config(conf, &attached_conf.tenant_conf), - crate::metrics::tenant_throttling::TimelineGet::new(&tenant_shard_id), + pagestream_throttle: Arc::new(throttle::Throttle::new( + Tenant::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf), + crate::metrics::tenant_throttling::Metrics::new(&tenant_shard_id), )), tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)), ongoing_timeline_detach: std::sync::Mutex::default(), @@ -4909,7 +5023,7 @@ impl Tenant { fn build_timeline_resources(&self, timeline_id: TimelineId) -> TimelineResources { TimelineResources { remote_client: self.build_timeline_remote_client(timeline_id), - timeline_get_throttle: self.timeline_get_throttle.clone(), + pagestream_throttle: self.pagestream_throttle.clone(), l0_flush_global_state: self.l0_flush_global_state.clone(), } } @@ -9163,6 +9277,7 @@ mod tests { CompactOptions { flags: dryrun_flags, compact_range: None, + ..Default::default() }, &ctx, ) @@ -9399,6 +9514,7 @@ mod tests { CompactOptions { flags: dryrun_flags, compact_range: None, + ..Default::default() }, &ctx, ) @@ -9885,7 +10001,15 @@ mod tests { // Do a partial compaction on key range 0..2 tline - .partial_compact_with_gc(get_key(0)..get_key(2), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(0)..get_key(2)).into()), + ..Default::default() + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; @@ -9924,7 +10048,15 @@ mod tests { // Do a partial compaction on key range 2..4 tline - .partial_compact_with_gc(get_key(2)..get_key(4), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(2)..get_key(4)).into()), + ..Default::default() + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; @@ -9968,7 +10100,15 @@ mod tests { // Do a partial compaction on key range 4..9 tline - .partial_compact_with_gc(get_key(4)..get_key(9), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(4)..get_key(9)).into()), + ..Default::default() + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; @@ -10011,7 +10151,15 @@ mod tests { // Do a partial compaction on key range 9..10 tline - .partial_compact_with_gc(get_key(9)..get_key(10), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(9)..get_key(10)).into()), + ..Default::default() + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; @@ -10059,7 +10207,15 @@ mod tests { // Do a partial compaction on key range 0..10, all image layers below LSN 20 can be replaced with new ones. tline - .partial_compact_with_gc(get_key(0)..get_key(10), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(0)..get_key(10)).into()), + ..Default::default() + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs index de0abab4c0c7..aaec8a4c313a 100644 --- a/pageserver/src/tenant/ephemeral_file.rs +++ b/pageserver/src/tenant/ephemeral_file.rs @@ -8,10 +8,8 @@ use crate::page_cache; use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File; use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut; use crate::virtual_file::owned_buffers_io::slice::SliceMutExt; -use crate::virtual_file::owned_buffers_io::util::size_tracking_writer; use crate::virtual_file::owned_buffers_io::write::Buffer; use crate::virtual_file::{self, owned_buffers_io, IoBufferMut, VirtualFile}; -use bytes::BytesMut; use camino::Utf8PathBuf; use num_traits::Num; use pageserver_api::shard::TenantShardId; @@ -20,6 +18,7 @@ use tracing::error; use std::io; use std::sync::atomic::AtomicU64; +use std::sync::Arc; use utils::id::TimelineId; pub struct EphemeralFile { @@ -27,10 +26,7 @@ pub struct EphemeralFile { _timeline_id: TimelineId, page_cache_file_id: page_cache::FileId, bytes_written: u64, - buffered_writer: owned_buffers_io::write::BufferedWriter< - BytesMut, - size_tracking_writer::Writer, - >, + buffered_writer: owned_buffers_io::write::BufferedWriter, /// Gate guard is held on as long as we need to do operations in the path (delete on drop) _gate_guard: utils::sync::gate::GateGuard, } @@ -42,9 +38,9 @@ impl EphemeralFile { conf: &PageServerConf, tenant_shard_id: TenantShardId, timeline_id: TimelineId, - gate_guard: utils::sync::gate::GateGuard, + gate: &utils::sync::gate::Gate, ctx: &RequestContext, - ) -> Result { + ) -> anyhow::Result { static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1); let filename_disambiguator = NEXT_FILENAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed); @@ -55,15 +51,17 @@ impl EphemeralFile { "ephemeral-{filename_disambiguator}" ))); - let file = VirtualFile::open_with_options( - &filename, - virtual_file::OpenOptions::new() - .read(true) - .write(true) - .create(true), - ctx, - ) - .await?; + let file = Arc::new( + VirtualFile::open_with_options_v2( + &filename, + virtual_file::OpenOptions::new() + .read(true) + .write(true) + .create(true), + ctx, + ) + .await?, + ); let page_cache_file_id = page_cache::next_file_id(); // XXX get rid, we're not page-caching anymore @@ -73,10 +71,12 @@ impl EphemeralFile { page_cache_file_id, bytes_written: 0, buffered_writer: owned_buffers_io::write::BufferedWriter::new( - size_tracking_writer::Writer::new(file), - BytesMut::with_capacity(TAIL_SZ), + file, + || IoBufferMut::with_capacity(TAIL_SZ), + gate.enter()?, + ctx, ), - _gate_guard: gate_guard, + _gate_guard: gate.enter()?, }) } } @@ -85,7 +85,7 @@ impl Drop for EphemeralFile { fn drop(&mut self) { // unlink the file // we are clear to do this, because we have entered a gate - let path = self.buffered_writer.as_inner().as_inner().path(); + let path = self.buffered_writer.as_inner().path(); let res = std::fs::remove_file(path); if let Err(e) = res { if e.kind() != std::io::ErrorKind::NotFound { @@ -132,6 +132,18 @@ impl EphemeralFile { srcbuf: &[u8], ctx: &RequestContext, ) -> std::io::Result { + let (pos, control) = self.write_raw_controlled(srcbuf, ctx).await?; + if let Some(control) = control { + control.release().await; + } + Ok(pos) + } + + async fn write_raw_controlled( + &mut self, + srcbuf: &[u8], + ctx: &RequestContext, + ) -> std::io::Result<(u64, Option)> { let pos = self.bytes_written; let new_bytes_written = pos.checked_add(srcbuf.len().into_u64()).ok_or_else(|| { @@ -145,9 +157,9 @@ impl EphemeralFile { })?; // Write the payload - let nwritten = self + let (nwritten, control) = self .buffered_writer - .write_buffered_borrowed(srcbuf, ctx) + .write_buffered_borrowed_controlled(srcbuf, ctx) .await?; assert_eq!( nwritten, @@ -157,7 +169,7 @@ impl EphemeralFile { self.bytes_written = new_bytes_written; - Ok(pos) + Ok((pos, control)) } } @@ -168,11 +180,12 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral dst: tokio_epoll_uring::Slice, ctx: &'a RequestContext, ) -> std::io::Result<(tokio_epoll_uring::Slice, usize)> { - let file_size_tracking_writer = self.buffered_writer.as_inner(); - let flushed_offset = file_size_tracking_writer.bytes_written(); + let submitted_offset = self.buffered_writer.bytes_submitted(); + + let mutable = self.buffered_writer.inspect_mutable(); + let mutable = &mutable[0..mutable.pending()]; - let buffer = self.buffered_writer.inspect_buffer(); - let buffered = &buffer[0..buffer.pending()]; + let maybe_flushed = self.buffered_writer.inspect_maybe_flushed(); let dst_cap = dst.bytes_total().into_u64(); let end = { @@ -197,11 +210,42 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral } } } - let written_range = Range(start, std::cmp::min(end, flushed_offset)); - let buffered_range = Range(std::cmp::max(start, flushed_offset), end); + + let (written_range, maybe_flushed_range) = { + if maybe_flushed.is_some() { + // [ written ][ maybe_flushed ][ mutable ] + // <- TAIL_SZ -><- TAIL_SZ -> + // ^ + // `submitted_offset` + // <++++++ on disk +++++++????????????????> + ( + Range( + start, + std::cmp::min(end, submitted_offset.saturating_sub(TAIL_SZ as u64)), + ), + Range( + std::cmp::max(start, submitted_offset.saturating_sub(TAIL_SZ as u64)), + std::cmp::min(end, submitted_offset), + ), + ) + } else { + // [ written ][ mutable ] + // <- TAIL_SZ -> + // ^ + // `submitted_offset` + // <++++++ on disk +++++++++++++++++++++++> + ( + Range(start, std::cmp::min(end, submitted_offset)), + // zero len + Range(submitted_offset, u64::MIN), + ) + } + }; + + let mutable_range = Range(std::cmp::max(start, submitted_offset), end); let dst = if written_range.len() > 0 { - let file: &VirtualFile = file_size_tracking_writer.as_inner(); + let file: &VirtualFile = self.buffered_writer.as_inner(); let bounds = dst.bounds(); let slice = file .read_exact_at(dst.slice(0..written_range.len().into_usize()), start, ctx) @@ -211,19 +255,21 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral dst }; - let dst = if buffered_range.len() > 0 { - let offset_in_buffer = buffered_range + let dst = if maybe_flushed_range.len() > 0 { + let offset_in_buffer = maybe_flushed_range .0 - .checked_sub(flushed_offset) + .checked_sub(submitted_offset.saturating_sub(TAIL_SZ as u64)) .unwrap() .into_usize(); - let to_copy = - &buffered[offset_in_buffer..(offset_in_buffer + buffered_range.len().into_usize())]; + // Checked previously the buffer is Some. + let maybe_flushed = maybe_flushed.unwrap(); + let to_copy = &maybe_flushed + [offset_in_buffer..(offset_in_buffer + maybe_flushed_range.len().into_usize())]; let bounds = dst.bounds(); let mut view = dst.slice({ let start = written_range.len().into_usize(); let end = start - .checked_add(buffered_range.len().into_usize()) + .checked_add(maybe_flushed_range.len().into_usize()) .unwrap(); start..end }); @@ -234,6 +280,28 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral dst }; + let dst = if mutable_range.len() > 0 { + let offset_in_buffer = mutable_range + .0 + .checked_sub(submitted_offset) + .unwrap() + .into_usize(); + let to_copy = + &mutable[offset_in_buffer..(offset_in_buffer + mutable_range.len().into_usize())]; + let bounds = dst.bounds(); + let mut view = dst.slice({ + let start = + written_range.len().into_usize() + maybe_flushed_range.len().into_usize(); + let end = start.checked_add(mutable_range.len().into_usize()).unwrap(); + start..end + }); + view.as_mut_rust_slice_full_zeroed() + .copy_from_slice(to_copy); + Slice::from_buf_bounds(Slice::into_inner(view), bounds) + } else { + dst + }; + // TODO: in debug mode, randomize the remaining bytes in `dst` to catch bugs Ok((dst, (end - start).into_usize())) @@ -295,7 +363,7 @@ mod tests { let gate = utils::sync::gate::Gate::default(); - let file = EphemeralFile::create(conf, tenant_id, timeline_id, gate.enter().unwrap(), &ctx) + let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx) .await .unwrap(); @@ -326,14 +394,15 @@ mod tests { let gate = utils::sync::gate::Gate::default(); - let mut file = - EphemeralFile::create(conf, tenant_id, timeline_id, gate.enter().unwrap(), &ctx) - .await - .unwrap(); + let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx) + .await + .unwrap(); - let cap = file.buffered_writer.inspect_buffer().capacity(); + let mutable = file.buffered_writer.inspect_mutable(); + let cap = mutable.capacity(); + let align = mutable.align(); - let write_nbytes = cap + cap / 2; + let write_nbytes = cap * 2 + cap / 2; let content: Vec = rand::thread_rng() .sample_iter(rand::distributions::Standard) @@ -341,30 +410,39 @@ mod tests { .collect(); let mut value_offsets = Vec::new(); - for i in 0..write_nbytes { - let off = file.write_raw(&content[i..i + 1], &ctx).await.unwrap(); + for range in (0..write_nbytes) + .step_by(align) + .map(|start| start..(start + align).min(write_nbytes)) + { + let off = file.write_raw(&content[range], &ctx).await.unwrap(); value_offsets.push(off); } - assert!(file.len() as usize == write_nbytes); - for i in 0..write_nbytes { - assert_eq!(value_offsets[i], i.into_u64()); - let buf = IoBufferMut::with_capacity(1); + assert_eq!(file.len() as usize, write_nbytes); + for (i, range) in (0..write_nbytes) + .step_by(align) + .map(|start| start..(start + align).min(write_nbytes)) + .enumerate() + { + assert_eq!(value_offsets[i], range.start.into_u64()); + let buf = IoBufferMut::with_capacity(range.len()); let (buf_slice, nread) = file - .read_exact_at_eof_ok(i.into_u64(), buf.slice_full(), &ctx) + .read_exact_at_eof_ok(range.start.into_u64(), buf.slice_full(), &ctx) .await .unwrap(); let buf = buf_slice.into_inner(); - assert_eq!(nread, 1); - assert_eq!(&buf, &content[i..i + 1]); + assert_eq!(nread, range.len()); + assert_eq!(&buf, &content[range]); } - let file_contents = - std::fs::read(file.buffered_writer.as_inner().as_inner().path()).unwrap(); - assert_eq!(file_contents, &content[0..cap]); + let file_contents = std::fs::read(file.buffered_writer.as_inner().path()).unwrap(); + assert!(file_contents == content[0..cap * 2]); + + let maybe_flushed_buffer_contents = file.buffered_writer.inspect_maybe_flushed().unwrap(); + assert_eq!(&maybe_flushed_buffer_contents[..], &content[cap..cap * 2]); - let buffer_contents = file.buffered_writer.inspect_buffer(); - assert_eq!(buffer_contents, &content[cap..write_nbytes]); + let mutable_buffer_contents = file.buffered_writer.inspect_mutable(); + assert_eq!(mutable_buffer_contents, &content[cap * 2..write_nbytes]); } #[tokio::test] @@ -373,16 +451,16 @@ mod tests { let gate = utils::sync::gate::Gate::default(); - let mut file = - EphemeralFile::create(conf, tenant_id, timeline_id, gate.enter().unwrap(), &ctx) - .await - .unwrap(); + let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx) + .await + .unwrap(); - let cap = file.buffered_writer.inspect_buffer().capacity(); + // mutable buffer and maybe_flushed buffer each has `cap` bytes. + let cap = file.buffered_writer.inspect_mutable().capacity(); let content: Vec = rand::thread_rng() .sample_iter(rand::distributions::Standard) - .take(cap + cap / 2) + .take(cap * 2 + cap / 2) .collect(); file.write_raw(&content, &ctx).await.unwrap(); @@ -390,23 +468,21 @@ mod tests { // assert the state is as this test expects it to be assert_eq!( &file.load_to_io_buf(&ctx).await.unwrap(), - &content[0..cap + cap / 2] + &content[0..cap * 2 + cap / 2] ); - let md = file - .buffered_writer - .as_inner() - .as_inner() - .path() - .metadata() - .unwrap(); + let md = file.buffered_writer.as_inner().path().metadata().unwrap(); assert_eq!( md.len(), - cap.into_u64(), - "buffered writer does one write if we write 1.5x buffer capacity" + 2 * cap.into_u64(), + "buffered writer requires one write to be flushed if we write 2.5x buffer capacity" + ); + assert_eq!( + &file.buffered_writer.inspect_maybe_flushed().unwrap()[0..cap], + &content[cap..cap * 2] ); assert_eq!( - &file.buffered_writer.inspect_buffer()[0..cap / 2], - &content[cap..cap + cap / 2] + &file.buffered_writer.inspect_mutable()[0..cap / 2], + &content[cap * 2..cap * 2 + cap / 2] ); } @@ -422,19 +498,19 @@ mod tests { let gate = utils::sync::gate::Gate::default(); - let mut file = - EphemeralFile::create(conf, tenant_id, timeline_id, gate.enter().unwrap(), &ctx) - .await - .unwrap(); - - let cap = file.buffered_writer.inspect_buffer().capacity(); + let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx) + .await + .unwrap(); + let mutable = file.buffered_writer.inspect_mutable(); + let cap = mutable.capacity(); + let align = mutable.align(); let content: Vec = rand::thread_rng() .sample_iter(rand::distributions::Standard) - .take(cap + cap / 2) + .take(cap * 2 + cap / 2) .collect(); - file.write_raw(&content, &ctx).await.unwrap(); + let (_, control) = file.write_raw_controlled(&content, &ctx).await.unwrap(); let test_read = |start: usize, len: usize| { let file = &file; @@ -454,16 +530,38 @@ mod tests { } }; + let test_read_all_offset_combinations = || { + async move { + test_read(align, align).await; + // border onto edge of file + test_read(cap - align, align).await; + // read across file and buffer + test_read(cap - align, 2 * align).await; + // stay from start of maybe flushed buffer + test_read(cap, align).await; + // completely within maybe flushed buffer + test_read(cap + align, align).await; + // border onto edge of maybe flushed buffer. + test_read(cap * 2 - align, align).await; + // read across maybe flushed and mutable buffer + test_read(cap * 2 - align, 2 * align).await; + // read across three segments + test_read(cap - align, cap + 2 * align).await; + // completely within mutable buffer + test_read(cap * 2 + align, align).await; + } + }; + // completely within the file range - assert!(20 < cap, "test assumption"); - test_read(10, 10).await; - // border onto edge of file - test_read(cap - 10, 10).await; - // read across file and buffer - test_read(cap - 10, 20).await; - // stay from start of buffer - test_read(cap, 10).await; - // completely within buffer - test_read(cap + 10, 10).await; + assert!(align < cap, "test assumption"); + assert!(cap % align == 0); + + // test reads at different flush stages. + let not_started = control.unwrap().into_not_started(); + test_read_all_offset_combinations().await; + let in_progress = not_started.ready_to_flush(); + test_read_all_offset_combinations().await; + in_progress.wait_until_flush_is_done().await; + test_read_all_offset_combinations().await; } } diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index eb8191e43e1c..e8b0d1d4dd64 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -347,7 +347,7 @@ async fn init_load_generations( ); emergency_generations(tenant_confs) } else if let Some(client) = ControllerUpcallClient::new(conf, cancel) { - info!("Calling control plane API to re-attach tenants"); + info!("Calling {} API to re-attach tenants", client.base_url()); // If we are configured to use the control plane API, then it is the source of truth for what tenants to load. match client.re_attach(conf).await { Ok(tenants) => tenants @@ -894,7 +894,7 @@ impl TenantManager { Some(TenantSlot::Attached(tenant)) => Ok(Arc::clone(tenant)), Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_shard_id)), None | Some(TenantSlot::Secondary(_)) => { - Err(GetTenantError::NotFound(tenant_shard_id.tenant_id)) + Err(GetTenantError::ShardNotFound(tenant_shard_id)) } } } @@ -2258,6 +2258,9 @@ pub(crate) enum GetTenantError { #[error("Tenant {0} not found")] NotFound(TenantId), + #[error("Tenant {0} not found")] + ShardNotFound(TenantShardId), + #[error("Tenant {0} is not active")] NotActive(TenantShardId), diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 007bd3eef083..20e0536a00e5 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -681,6 +681,7 @@ impl RemoteTimelineClient { layer_file_name: &LayerName, layer_metadata: &LayerFileMetadata, local_path: &Utf8Path, + gate: &utils::sync::gate::Gate, cancel: &CancellationToken, ctx: &RequestContext, ) -> Result { @@ -700,6 +701,7 @@ impl RemoteTimelineClient { layer_file_name, layer_metadata, local_path, + gate, cancel, ctx, ) @@ -2190,6 +2192,9 @@ impl RemoteTimelineClient { upload_queue.clean.1 = Some(task.task_id); let lsn = upload_queue.clean.0.metadata.disk_consistent_lsn(); + self.metrics + .projected_remote_consistent_lsn_gauge + .set(lsn.0); if self.generation.is_none() { // Legacy mode: skip validating generation @@ -2564,9 +2569,9 @@ pub fn parse_remote_index_path(path: RemotePath) -> Option { } /// Given the key of a tenant manifest, parse out the generation number -pub(crate) fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option { +pub fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option { static RE: OnceLock = OnceLock::new(); - let re = RE.get_or_init(|| Regex::new(r".+tenant-manifest-([0-9a-f]{8}).json").unwrap()); + let re = RE.get_or_init(|| Regex::new(r".*tenant-manifest-([0-9a-f]{8}).json").unwrap()); re.captures(path.get_path().as_str()) .and_then(|c| c.get(1)) .and_then(|m| Generation::parse_suffix(m.as_str())) diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index 739615be9cef..d15f161fb6da 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -26,8 +26,6 @@ use crate::span::{ use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path}; use crate::tenant::storage_layer::LayerName; use crate::tenant::Generation; -#[cfg_attr(target_os = "macos", allow(unused_imports))] -use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt; use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile}; use crate::TEMP_FILE_SUFFIX; use remote_storage::{ @@ -60,6 +58,7 @@ pub async fn download_layer_file<'a>( layer_file_name: &'a LayerName, layer_metadata: &'a LayerFileMetadata, local_path: &Utf8Path, + gate: &utils::sync::gate::Gate, cancel: &CancellationToken, ctx: &RequestContext, ) -> Result { @@ -88,7 +87,9 @@ pub async fn download_layer_file<'a>( let temp_file_path = path_with_suffix_extension(local_path, TEMP_DOWNLOAD_EXTENSION); let bytes_amount = download_retry( - || async { download_object(storage, &remote_path, &temp_file_path, cancel, ctx).await }, + || async { + download_object(storage, &remote_path, &temp_file_path, gate, cancel, ctx).await + }, &format!("download {remote_path:?}"), cancel, ) @@ -148,6 +149,7 @@ async fn download_object<'a>( storage: &'a GenericRemoteStorage, src_path: &RemotePath, dst_path: &Utf8PathBuf, + #[cfg_attr(target_os = "macos", allow(unused_variables))] gate: &utils::sync::gate::Gate, cancel: &CancellationToken, #[cfg_attr(target_os = "macos", allow(unused_variables))] ctx: &RequestContext, ) -> Result { @@ -205,13 +207,18 @@ async fn download_object<'a>( } #[cfg(target_os = "linux")] crate::virtual_file::io_engine::IoEngine::TokioEpollUring => { - use crate::virtual_file::owned_buffers_io::{self, util::size_tracking_writer}; - use bytes::BytesMut; + use crate::virtual_file::owned_buffers_io; + use crate::virtual_file::IoBufferMut; + use std::sync::Arc; async { - let destination_file = VirtualFile::create(dst_path, ctx) - .await - .with_context(|| format!("create a destination file for layer '{dst_path}'")) - .map_err(DownloadError::Other)?; + let destination_file = Arc::new( + VirtualFile::create(dst_path, ctx) + .await + .with_context(|| { + format!("create a destination file for layer '{dst_path}'") + }) + .map_err(DownloadError::Other)?, + ); let mut download = storage .download(src_path, &DownloadOpts::default(), cancel) @@ -219,14 +226,16 @@ async fn download_object<'a>( pausable_failpoint!("before-downloading-layer-stream-pausable"); + let mut buffered = owned_buffers_io::write::BufferedWriter::::new( + destination_file, + || IoBufferMut::with_capacity(super::BUFFER_SIZE), + gate.enter().map_err(|_| DownloadError::Cancelled)?, + ctx, + ); + // TODO: use vectored write (writev) once supported by tokio-epoll-uring. // There's chunks_vectored() on the stream. let (bytes_amount, destination_file) = async { - let size_tracking = size_tracking_writer::Writer::new(destination_file); - let mut buffered = owned_buffers_io::write::BufferedWriter::::new( - size_tracking, - BytesMut::with_capacity(super::BUFFER_SIZE), - ); while let Some(res) = futures::StreamExt::next(&mut download.download_stream).await { @@ -234,10 +243,10 @@ async fn download_object<'a>( Ok(chunk) => chunk, Err(e) => return Err(e), }; - buffered.write_buffered(chunk.slice_len(), ctx).await?; + buffered.write_buffered_borrowed(&chunk, ctx).await?; } - let size_tracking = buffered.flush_and_into_inner(ctx).await?; - Ok(size_tracking.into_inner()) + let inner = buffered.flush_and_into_inner(ctx).await?; + Ok(inner) } .await?; diff --git a/pageserver/src/tenant/remote_timeline_client/manifest.rs b/pageserver/src/tenant/remote_timeline_client/manifest.rs index c4382cb6480f..2029847a1249 100644 --- a/pageserver/src/tenant/remote_timeline_client/manifest.rs +++ b/pageserver/src/tenant/remote_timeline_client/manifest.rs @@ -43,7 +43,7 @@ impl TenantManifest { offloaded_timelines: vec![], } } - pub(crate) fn from_json_bytes(bytes: &[u8]) -> Result { + pub fn from_json_bytes(bytes: &[u8]) -> Result { serde_json::from_slice::(bytes) } diff --git a/pageserver/src/tenant/secondary.rs b/pageserver/src/tenant/secondary.rs index 3df89a928cb2..4bc208331b35 100644 --- a/pageserver/src/tenant/secondary.rs +++ b/pageserver/src/tenant/secondary.rs @@ -22,6 +22,7 @@ use super::{ mgr::TenantManager, span::debug_assert_current_span_has_tenant_id, storage_layer::LayerName, + GetTenantError, }; use crate::metrics::SECONDARY_RESIDENT_PHYSICAL_SIZE; @@ -66,7 +67,21 @@ struct CommandRequest { } struct CommandResponse { - result: anyhow::Result<()>, + result: Result<(), SecondaryTenantError>, +} + +#[derive(thiserror::Error, Debug)] +pub(crate) enum SecondaryTenantError { + #[error("{0}")] + GetTenant(GetTenantError), + #[error("shutting down")] + ShuttingDown, +} + +impl From for SecondaryTenantError { + fn from(gte: GetTenantError) -> Self { + Self::GetTenant(gte) + } } // Whereas [`Tenant`] represents an attached tenant, this type represents the work @@ -285,7 +300,7 @@ impl SecondaryController { &self, queue: &tokio::sync::mpsc::Sender>, payload: T, - ) -> anyhow::Result<()> { + ) -> Result<(), SecondaryTenantError> { let (response_tx, response_rx) = tokio::sync::oneshot::channel(); queue @@ -294,20 +309,26 @@ impl SecondaryController { response_tx, }) .await - .map_err(|_| anyhow::anyhow!("Receiver shut down"))?; + .map_err(|_| SecondaryTenantError::ShuttingDown)?; let response = response_rx .await - .map_err(|_| anyhow::anyhow!("Request dropped"))?; + .map_err(|_| SecondaryTenantError::ShuttingDown)?; response.result } - pub async fn upload_tenant(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> { + pub(crate) async fn upload_tenant( + &self, + tenant_shard_id: TenantShardId, + ) -> Result<(), SecondaryTenantError> { self.dispatch(&self.upload_req_tx, UploadCommand::Upload(tenant_shard_id)) .await } - pub async fn download_tenant(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> { + pub(crate) async fn download_tenant( + &self, + tenant_shard_id: TenantShardId, + ) -> Result<(), SecondaryTenantError> { self.dispatch( &self.download_req_tx, DownloadCommand::Download(tenant_shard_id), diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs index 8d771dc40535..395e34e404c4 100644 --- a/pageserver/src/tenant/secondary/downloader.rs +++ b/pageserver/src/tenant/secondary/downloader.rs @@ -35,7 +35,7 @@ use super::{ self, period_jitter, period_warmup, Completion, JobGenerator, SchedulingResult, TenantBackgroundJobs, }, - SecondaryTenant, + GetTenantError, SecondaryTenant, SecondaryTenantError, }; use crate::tenant::{ @@ -470,15 +470,16 @@ impl JobGenerator anyhow::Result { + fn on_command( + &mut self, + command: DownloadCommand, + ) -> Result { let tenant_shard_id = command.get_tenant_shard_id(); let tenant = self .tenant_manager - .get_secondary_tenant_shard(*tenant_shard_id); - let Some(tenant) = tenant else { - return Err(anyhow::anyhow!("Not found or not in Secondary mode")); - }; + .get_secondary_tenant_shard(*tenant_shard_id) + .ok_or(GetTenantError::ShardNotFound(*tenant_shard_id))?; Ok(PendingDownload { target_time: None, @@ -1182,6 +1183,7 @@ impl<'a> TenantDownloader<'a> { &layer.name, &layer.metadata, &local_path, + &self.secondary_state.gate, &self.secondary_state.cancel, ctx, ) diff --git a/pageserver/src/tenant/secondary/heatmap_uploader.rs b/pageserver/src/tenant/secondary/heatmap_uploader.rs index e680fd705b42..c5e5e0494571 100644 --- a/pageserver/src/tenant/secondary/heatmap_uploader.rs +++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs @@ -28,7 +28,7 @@ use super::{ self, period_jitter, period_warmup, JobGenerator, RunningJob, SchedulingResult, TenantBackgroundJobs, }, - CommandRequest, UploadCommand, + CommandRequest, SecondaryTenantError, UploadCommand, }; use tokio_util::sync::CancellationToken; use tracing::{info_span, instrument, Instrument}; @@ -279,7 +279,10 @@ impl JobGenerator }.instrument(info_span!(parent: None, "heatmap_upload", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug())))) } - fn on_command(&mut self, command: UploadCommand) -> anyhow::Result { + fn on_command( + &mut self, + command: UploadCommand, + ) -> Result { let tenant_shard_id = command.get_tenant_shard_id(); tracing::info!( @@ -287,8 +290,7 @@ impl JobGenerator "Starting heatmap write on command"); let tenant = self .tenant_manager - .get_attached_tenant_shard(*tenant_shard_id) - .map_err(|e| anyhow::anyhow!(e))?; + .get_attached_tenant_shard(*tenant_shard_id)?; if !tenant.is_active() { return Err(GetTenantError::NotActive(*tenant_shard_id).into()); } diff --git a/pageserver/src/tenant/secondary/scheduler.rs b/pageserver/src/tenant/secondary/scheduler.rs index 28cf2125dfd0..e963c722b97a 100644 --- a/pageserver/src/tenant/secondary/scheduler.rs +++ b/pageserver/src/tenant/secondary/scheduler.rs @@ -12,7 +12,7 @@ use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use utils::{completion::Barrier, yielding_loop::yielding_loop}; -use super::{CommandRequest, CommandResponse}; +use super::{CommandRequest, CommandResponse, SecondaryTenantError}; /// Scheduling interval is the time between calls to JobGenerator::schedule. /// When we schedule jobs, the job generator may provide a hint of its preferred @@ -112,7 +112,7 @@ where /// Called when a command is received. A job will be spawned immediately if the return /// value is Some, ignoring concurrency limits and the pending queue. - fn on_command(&mut self, cmd: CMD) -> anyhow::Result; + fn on_command(&mut self, cmd: CMD) -> Result; } /// [`JobGenerator`] returns this to provide pending jobs, and hints about scheduling diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs index af6112d53550..71e53da20f7f 100644 --- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs +++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs @@ -555,13 +555,12 @@ impl InMemoryLayer { timeline_id: TimelineId, tenant_shard_id: TenantShardId, start_lsn: Lsn, - gate_guard: utils::sync::gate::GateGuard, + gate: &utils::sync::gate::Gate, ctx: &RequestContext, ) -> Result { trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}"); - let file = - EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate_guard, ctx).await?; + let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate, ctx).await?; let key = InMemoryLayerFileId(file.page_cache_file_id()); Ok(InMemoryLayer { diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index a9f1189b4112..8933e8ceb13e 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -1149,6 +1149,7 @@ impl LayerInner { &self.desc.layer_name(), &self.metadata(), &self.path, + &timeline.gate, &timeline.cancel, ctx, ) diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 16dac10dca22..0118a5ce5f9d 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -471,14 +471,14 @@ async fn ingest_housekeeping_loop(tenant: Arc, cancel: CancellationToken // TODO: rename the background loop kind to something more generic, like, tenant housekeeping. // Or just spawn another background loop for this throttle, it's not like it's super costly. - info_span!(parent: None, "timeline_get_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| { + info_span!(parent: None, "pagestream_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| { let now = Instant::now(); let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now); - let Stats { count_accounted_start, count_accounted_finish, count_throttled, sum_throttled_usecs} = tenant.timeline_get_throttle.reset_stats(); + let Stats { count_accounted_start, count_accounted_finish, count_throttled, sum_throttled_usecs} = tenant.pagestream_throttle.reset_stats(); if count_throttled == 0 { return; } - let allowed_rps = tenant.timeline_get_throttle.steady_rps(); + let allowed_rps = tenant.pagestream_throttle.steady_rps(); let delta = now - prev; info!( n_seconds=%format_args!("{:.3}", delta.as_secs_f64()), diff --git a/pageserver/src/tenant/throttle.rs b/pageserver/src/tenant/throttle.rs index 6a8095390177..54c0e59daaf6 100644 --- a/pageserver/src/tenant/throttle.rs +++ b/pageserver/src/tenant/throttle.rs @@ -1,19 +1,14 @@ use std::{ - str::FromStr, sync::{ atomic::{AtomicU64, Ordering}, - Arc, Mutex, + Arc, }, time::{Duration, Instant}, }; use arc_swap::ArcSwap; -use enumset::EnumSet; -use tracing::{error, warn}; use utils::leaky_bucket::{LeakyBucketConfig, RateLimiter}; -use crate::{context::RequestContext, task_mgr::TaskKind}; - /// Throttle for `async` functions. /// /// Runtime reconfigurable. @@ -35,7 +30,7 @@ pub struct Throttle { } pub struct Inner { - task_kinds: EnumSet, + enabled: bool, rate_limiter: Arc, } @@ -79,26 +74,12 @@ where } fn new_inner(config: Config) -> Inner { let Config { - task_kinds, + enabled, initial, refill_interval, refill_amount, max, } = config; - let task_kinds: EnumSet = task_kinds - .iter() - .filter_map(|s| match TaskKind::from_str(s) { - Ok(v) => Some(v), - Err(e) => { - // TODO: avoid this failure mode - error!( - "cannot parse task kind, ignoring for rate limiting {}", - utils::error::report_compact_sources(&e) - ); - None - } - }) - .collect(); // steady rate, we expect `refill_amount` requests per `refill_interval`. // dividing gives us the rps. @@ -112,7 +93,7 @@ where let rate_limiter = RateLimiter::with_initial_tokens(config, f64::from(initial_tokens)); Inner { - task_kinds, + enabled: enabled.is_enabled(), rate_limiter: Arc::new(rate_limiter), } } @@ -141,11 +122,13 @@ where self.inner.load().rate_limiter.steady_rps() } - pub async fn throttle(&self, ctx: &RequestContext, key_count: usize) -> Option { + pub async fn throttle(&self, key_count: usize) -> Option { let inner = self.inner.load_full(); // clones the `Inner` Arc - if !inner.task_kinds.contains(ctx.task_kind()) { + + if !inner.enabled { return None; - }; + } + let start = std::time::Instant::now(); self.metric.accounting_start(); @@ -162,19 +145,6 @@ where .fetch_add(wait_time.as_micros() as u64, Ordering::Relaxed); let observation = Observation { wait_time }; self.metric.observe_throttling(&observation); - match ctx.micros_spent_throttled.add(wait_time) { - Ok(res) => res, - Err(error) => { - use once_cell::sync::Lazy; - use utils::rate_limit::RateLimit; - static WARN_RATE_LIMIT: Lazy> = - Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10)))); - let mut guard = WARN_RATE_LIMIT.lock().unwrap(); - guard.call(move || { - warn!(error, "error adding time spent throttled; this message is logged at a global rate limit"); - }); - } - } Some(wait_time) } else { None diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 730477a7f4dc..8f1d5f6577a6 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -53,7 +53,7 @@ use utils::{ postgres_client::PostgresClientProtocol, sync::gate::{Gate, GateGuard}, }; -use wal_decoder::serialized_batch::SerializedValueBatch; +use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta}; use std::sync::atomic::Ordering as AtomicOrdering; use std::sync::{Arc, Mutex, RwLock, Weak}; @@ -208,8 +208,8 @@ fn drop_wlock(rlock: tokio::sync::RwLockWriteGuard<'_, T>) { /// The outward-facing resources required to build a Timeline pub struct TimelineResources { pub remote_client: RemoteTimelineClient, - pub timeline_get_throttle: - Arc>, + pub pagestream_throttle: + Arc>, pub l0_flush_global_state: l0_flush::L0FlushGlobalState, } @@ -411,9 +411,9 @@ pub struct Timeline { /// Timeline deletion will acquire both compaction and gc locks in whatever order. gc_lock: tokio::sync::Mutex<()>, - /// Cloned from [`super::Tenant::timeline_get_throttle`] on construction. - timeline_get_throttle: - Arc>, + /// Cloned from [`super::Tenant::pagestream_throttle`] on construction. + pub(crate) pagestream_throttle: + Arc>, /// Size estimator for aux file v2 pub(crate) aux_file_size_estimator: AuxFileSizeEstimator, @@ -768,7 +768,7 @@ pub enum GetLogicalSizePriority { Background, } -#[derive(enumset::EnumSetType)] +#[derive(Debug, enumset::EnumSetType)] pub(crate) enum CompactFlags { ForceRepartition, ForceImageLayerCreation, @@ -777,6 +777,19 @@ pub(crate) enum CompactFlags { DryRun, } +#[serde_with::serde_as] +#[derive(Debug, Clone, serde::Deserialize)] +pub(crate) struct CompactRequest { + pub compact_range: Option, + pub compact_below_lsn: Option, + /// Whether the compaction job should be scheduled. + #[serde(default)] + pub scheduled: bool, + /// Whether the compaction job should be split across key ranges. + #[serde(default)] + pub sub_compaction: bool, +} + #[serde_with::serde_as] #[derive(Debug, Clone, serde::Deserialize)] pub(crate) struct CompactRange { @@ -786,10 +799,27 @@ pub(crate) struct CompactRange { pub end: Key, } -#[derive(Clone, Default)] +impl From> for CompactRange { + fn from(range: Range) -> Self { + CompactRange { + start: range.start, + end: range.end, + } + } +} + +#[derive(Debug, Clone, Default)] pub(crate) struct CompactOptions { pub flags: EnumSet, + /// If set, the compaction will only compact the key range specified by this option. + /// This option is only used by GC compaction. pub compact_range: Option, + /// If set, the compaction will only compact the LSN below this value. + /// This option is only used by GC compaction. + pub compact_below_lsn: Option, + /// Enable sub-compaction (split compaction job across key ranges). + /// This option is only used by GC compaction. + pub sub_compaction: bool, } impl std::fmt::Debug for Timeline { @@ -949,7 +979,7 @@ impl Timeline { /// If a remote layer file is needed, it is downloaded as part of this /// call. /// - /// This method enforces [`Self::timeline_get_throttle`] internally. + /// This method enforces [`Self::pagestream_throttle`] internally. /// /// NOTE: It is considered an error to 'get' a key that doesn't exist. The /// abstraction above this needs to store suitable metadata to track what @@ -977,8 +1007,6 @@ impl Timeline { // page_service. debug_assert!(!self.shard_identity.is_key_disposable(&key)); - self.timeline_get_throttle.throttle(ctx, 1).await; - let keyspace = KeySpace { ranges: vec![key..key.next()], }; @@ -1058,13 +1086,6 @@ impl Timeline { .for_task_kind(ctx.task_kind()) .map(|metric| (metric, Instant::now())); - // start counting after throttle so that throttle time - // is always less than observation time - let throttled = self - .timeline_get_throttle - .throttle(ctx, key_count as usize) - .await; - let res = self .get_vectored_impl( keyspace.clone(), @@ -1076,23 +1097,7 @@ impl Timeline { if let Some((metric, start)) = start { let elapsed = start.elapsed(); - let ex_throttled = if let Some(throttled) = throttled { - elapsed.checked_sub(throttled) - } else { - Some(elapsed) - }; - - if let Some(ex_throttled) = ex_throttled { - metric.observe(ex_throttled.as_secs_f64()); - } else { - use utils::rate_limit::RateLimit; - static LOGGED: Lazy> = - Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10)))); - let mut rate_limit = LOGGED.lock().unwrap(); - rate_limit.call(|| { - warn!("error deducting time spent throttled; this message is logged at a global rate limit"); - }); - } + metric.observe(elapsed.as_secs_f64()); } res @@ -1137,14 +1142,6 @@ impl Timeline { .for_task_kind(ctx.task_kind()) .map(ScanLatencyOngoingRecording::start_recording); - // start counting after throttle so that throttle time - // is always less than observation time - let throttled = self - .timeline_get_throttle - // assume scan = 1 quota for now until we find a better way to process this - .throttle(ctx, 1) - .await; - let vectored_res = self .get_vectored_impl( keyspace.clone(), @@ -1155,7 +1152,7 @@ impl Timeline { .await; if let Some(recording) = start { - recording.observe(throttled); + recording.observe(); } vectored_res @@ -1466,23 +1463,31 @@ impl Timeline { Ok(lease) } - /// Flush to disk all data that was written with the put_* functions + /// Freeze the current open in-memory layer. It will be written to disk on next iteration. + /// Returns the flush request ID which can be awaited with wait_flush_completion(). + #[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))] + pub(crate) async fn freeze(&self) -> Result { + self.freeze0().await + } + + /// Freeze and flush the open in-memory layer, waiting for it to be written to disk. #[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))] pub(crate) async fn freeze_and_flush(&self) -> Result<(), FlushLayerError> { self.freeze_and_flush0().await } + /// Freeze the current open in-memory layer. It will be written to disk on next iteration. + /// Returns the flush request ID which can be awaited with wait_flush_completion(). + pub(crate) async fn freeze0(&self) -> Result { + let mut g = self.write_lock.lock().await; + let to_lsn = self.get_last_record_lsn(); + self.freeze_inmem_layer_at(to_lsn, &mut g).await + } + // This exists to provide a non-span creating version of `freeze_and_flush` we can call without // polluting the span hierarchy. pub(crate) async fn freeze_and_flush0(&self) -> Result<(), FlushLayerError> { - let token = { - // Freeze the current open in-memory layer. It will be written to disk on next - // iteration. - let mut g = self.write_lock.lock().await; - - let to_lsn = self.get_last_record_lsn(); - self.freeze_inmem_layer_at(to_lsn, &mut g).await? - }; + let token = self.freeze0().await?; self.wait_flush_completion(token).await } @@ -1637,6 +1642,8 @@ impl Timeline { CompactOptions { flags, compact_range: None, + compact_below_lsn: None, + sub_compaction: false, }, ctx, ) @@ -2371,7 +2378,7 @@ impl Timeline { standby_horizon: AtomicLsn::new(0), - timeline_get_throttle: resources.timeline_get_throttle, + pagestream_throttle: resources.pagestream_throttle, aux_file_size_estimator: AuxFileSizeEstimator::new(aux_file_metrics), @@ -2392,7 +2399,7 @@ impl Timeline { result .metrics - .last_record_gauge + .last_record_lsn_gauge .set(disk_consistent_lsn.0 as i64); result }) @@ -3488,7 +3495,6 @@ impl Timeline { ctx: &RequestContext, ) -> anyhow::Result> { let mut guard = self.layers.write().await; - let gate_guard = self.gate.enter().context("enter gate for inmem layer")?; let last_record_lsn = self.get_last_record_lsn(); ensure!( @@ -3505,7 +3511,7 @@ impl Timeline { self.conf, self.timeline_id, self.tenant_shard_id, - gate_guard, + &self.gate, ctx, ) .await?; @@ -3515,7 +3521,7 @@ impl Timeline { pub(crate) fn finish_write(&self, new_lsn: Lsn) { assert!(new_lsn.is_aligned()); - self.metrics.last_record_gauge.set(new_lsn.0 as i64); + self.metrics.last_record_lsn_gauge.set(new_lsn.0 as i64); self.last_record_lsn.advance(new_lsn); } @@ -3883,6 +3889,10 @@ impl Timeline { fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool { let old_value = self.disk_consistent_lsn.fetch_max(new_value); assert!(new_value >= old_value, "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}"); + + self.metrics + .disk_consistent_lsn_gauge + .set(new_value.0 as i64); new_value != old_value } @@ -5921,6 +5931,23 @@ impl<'a> TimelineWriter<'a> { return Ok(()); } + // In debug builds, assert that we don't write any keys that don't belong to this shard. + // We don't assert this in release builds, since key ownership policies may change over + // time. Stray keys will be removed during compaction. + if cfg!(debug_assertions) { + for metadata in &batch.metadata { + if let ValueMeta::Serialized(metadata) = metadata { + let key = Key::from_compact(metadata.key); + assert!( + self.shard_identity.is_key_local(&key) + || self.shard_identity.is_key_global(&key), + "key {key} does not belong on shard {}", + self.shard_identity.shard_index() + ); + } + } + } + let batch_max_lsn = batch.max_lsn; let buf_size: u64 = batch.buffer_size() as u64; diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index ecd68ba55ec4..a18e157d37b4 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -10,13 +10,12 @@ use std::sync::Arc; use super::layer_manager::LayerManager; use super::{ - CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode, - RecordedDuration, Timeline, + CompactFlags, CompactOptions, CompactRange, CreateImageLayersError, DurationRecorder, + ImageLayerCreationMode, RecordedDuration, Timeline, }; use anyhow::{anyhow, bail, Context}; use bytes::Bytes; -use enumset::EnumSet; use fail::fail_point; use itertools::Itertools; use pageserver_api::key::KEY_SIZE; @@ -30,7 +29,6 @@ use utils::id::TimelineId; use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder}; use crate::page_cache; use crate::statvfs::Statvfs; -use crate::tenant::checks::check_valid_layermap; use crate::tenant::remote_timeline_client::WaitCompletionError; use crate::tenant::storage_layer::batch_split_writer::{ BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter, @@ -64,6 +62,12 @@ use super::CompactionError; /// Maximum number of deltas before generating an image layer in bottom-most compaction. const COMPACTION_DELTA_THRESHOLD: usize = 5; +/// A scheduled compaction task. +pub struct ScheduledCompactionTask { + pub options: CompactOptions, + pub result_tx: Option>, +} + pub struct GcCompactionJobDescription { /// All layers to read in the compaction job selected_layers: Vec, @@ -1174,11 +1178,12 @@ impl Timeline { .await .map_err(CompactionError::Other)?; } else { - debug!( - "Dropping key {} during compaction (it belongs on shard {:?})", - key, - self.shard_identity.get_shard_number(&key) - ); + let shard = self.shard_identity.shard_index(); + let owner = self.shard_identity.get_shard_number(&key); + if cfg!(debug_assertions) { + panic!("key {key} does not belong on shard {shard}, owned by {owner}"); + } + debug!("dropping key {key} during compaction (it belongs on shard {owner})"); } if !new_layers.is_empty() { @@ -1746,22 +1751,114 @@ impl Timeline { Ok(()) } - pub(crate) async fn compact_with_gc( + /// Split a gc-compaction job into multiple compaction jobs. Optimally, this function should return a vector of + /// `GcCompactionJobDesc`. But we want to keep it simple on the tenant scheduling side without exposing too much + /// ad-hoc information about gc compaction itself. + pub(crate) async fn gc_compaction_split_jobs( self: &Arc, - cancel: &CancellationToken, options: CompactOptions, - ctx: &RequestContext, - ) -> anyhow::Result<()> { - self.partial_compact_with_gc( - options - .compact_range - .map(|range| range.start..range.end) - .unwrap_or_else(|| Key::MIN..Key::MAX), - cancel, - options.flags, - ctx, - ) - .await + ) -> anyhow::Result> { + if !options.sub_compaction { + return Ok(vec![options]); + } + let compact_range = options.compact_range.clone().unwrap_or(CompactRange { + start: Key::MIN, + end: Key::MAX, + }); + let compact_below_lsn = if let Some(compact_below_lsn) = options.compact_below_lsn { + compact_below_lsn + } else { + let gc_info = self.gc_info.read().unwrap(); + gc_info.cutoffs.select_min() // use the real gc cutoff + }; + let mut compact_jobs = Vec::new(); + // For now, we simply use the key partitioning information; we should do a more fine-grained partitioning + // by estimating the amount of files read for a compaction job. We should also partition on LSN. + let Ok(partition) = self.partitioning.try_lock() else { + bail!("failed to acquire partition lock"); + }; + let ((dense_ks, sparse_ks), _) = &*partition; + // Truncate the key range to be within user specified compaction range. + fn truncate_to( + source_start: &Key, + source_end: &Key, + target_start: &Key, + target_end: &Key, + ) -> Option<(Key, Key)> { + let start = source_start.max(target_start); + let end = source_end.min(target_end); + if start < end { + Some((*start, *end)) + } else { + None + } + } + let mut split_key_ranges = Vec::new(); + let ranges = dense_ks + .parts + .iter() + .map(|partition| partition.ranges.iter()) + .chain(sparse_ks.parts.iter().map(|x| x.0.ranges.iter())) + .flatten() + .cloned() + .collect_vec(); + for range in ranges.iter() { + let Some((start, end)) = truncate_to( + &range.start, + &range.end, + &compact_range.start, + &compact_range.end, + ) else { + continue; + }; + split_key_ranges.push((start, end)); + } + split_key_ranges.sort(); + let guard = self.layers.read().await; + let layer_map = guard.layer_map()?; + let mut current_start = None; + // Split compaction job to about 2GB each + const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024; // 4GB, TODO: should be configuration in the future + let ranges_num = split_key_ranges.len(); + for (idx, (start, end)) in split_key_ranges.into_iter().enumerate() { + if current_start.is_none() { + current_start = Some(start); + } + let start = current_start.unwrap(); + if start >= end { + // We have already processed this partition. + continue; + } + let res = layer_map.range_search(start..end, compact_below_lsn); + let total_size = res.found.keys().map(|x| x.layer.file_size()).sum::(); + if total_size > GC_COMPACT_MAX_SIZE_MB * 1024 * 1024 || ranges_num == idx + 1 { + let mut compact_options = options.clone(); + // Try to extend the compaction range so that we include at least one full layer file. + let extended_end = res + .found + .keys() + .map(|layer| layer.layer.key_range.end) + .min(); + // It is possible that the search range does not contain any layer files when we reach the end of the loop. + // In this case, we simply use the specified key range end. + let end = if let Some(extended_end) = extended_end { + extended_end.max(end) + } else { + end + }; + info!( + "splitting compaction job: {}..{}, estimated_size={}", + start, end, total_size + ); + compact_options.compact_range = Some(CompactRange { start, end }); + compact_options.compact_below_lsn = Some(compact_below_lsn); + compact_options.sub_compaction = false; + compact_jobs.push(compact_options); + current_start = Some(end); + } + } + drop(guard); + Ok(compact_jobs) } /// An experimental compaction building block that combines compaction with garbage collection. @@ -1771,19 +1868,51 @@ impl Timeline { /// layers and image layers, which generates image layers on the gc horizon, drop deltas below gc horizon, /// and create delta layers with all deltas >= gc horizon. /// - /// If `key_range` is provided, it will only compact the keys within the range, aka partial compaction. + /// If `options.compact_range` is provided, it will only compact the keys within the range, aka partial compaction. /// Partial compaction will read and process all layers overlapping with the key range, even if it might /// contain extra keys. After the gc-compaction phase completes, delta layers that are not fully contained /// within the key range will be rewritten to ensure they do not overlap with the delta layers. Providing /// Key::MIN..Key..MAX to the function indicates a full compaction, though technically, `Key::MAX` is not /// part of the range. - pub(crate) async fn partial_compact_with_gc( + /// + /// If `options.compact_below_lsn` is provided, the compaction will only compact layers below or intersect with + /// the LSN. Otherwise, it will use the gc cutoff by default. + pub(crate) async fn compact_with_gc( + self: &Arc, + cancel: &CancellationToken, + options: CompactOptions, + ctx: &RequestContext, + ) -> anyhow::Result<()> { + if options.sub_compaction { + info!("running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); + let jobs = self.gc_compaction_split_jobs(options).await?; + let jobs_len = jobs.len(); + for (idx, job) in jobs.into_iter().enumerate() { + info!( + "running enhanced gc bottom-most compaction, sub-compaction {}/{}", + idx + 1, + jobs_len + ); + self.compact_with_gc_inner(cancel, job, ctx).await?; + } + if jobs_len == 0 { + info!("no jobs to run, skipping gc bottom-most compaction"); + } + return Ok(()); + } + self.compact_with_gc_inner(cancel, options, ctx).await + } + + async fn compact_with_gc_inner( self: &Arc, - compaction_key_range: Range, cancel: &CancellationToken, - flags: EnumSet, + options: CompactOptions, ctx: &RequestContext, ) -> anyhow::Result<()> { + assert!( + !options.sub_compaction, + "sub-compaction should be handled by the outer function" + ); // Block other compaction/GC tasks from running for now. GC-compaction could run along // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. // Note that we already acquired the compaction lock when the outer `compact` function gets called. @@ -1803,6 +1932,12 @@ impl Timeline { ) .await?; + let flags = options.flags; + let compaction_key_range = options + .compact_range + .map(|range| range.start..range.end) + .unwrap_or_else(|| Key::MIN..Key::MAX); + let dry_run = flags.contains(CompactFlags::DryRun); if compaction_key_range == (Key::MIN..Key::MAX) { @@ -1826,7 +1961,18 @@ impl Timeline { let layers = guard.layer_map()?; let gc_info = self.gc_info.read().unwrap(); let mut retain_lsns_below_horizon = Vec::new(); - let gc_cutoff = gc_info.cutoffs.select_min(); + let gc_cutoff = { + let real_gc_cutoff = gc_info.cutoffs.select_min(); + // The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for + // each of the retain_lsn. Therefore, if the user-provided `compact_below_lsn` is larger than the real gc cutoff, we will use + // the real cutoff. + let mut gc_cutoff = options.compact_below_lsn.unwrap_or(real_gc_cutoff); + if gc_cutoff > real_gc_cutoff { + warn!("provided compact_below_lsn={} is larger than the real_gc_cutoff={}, using the real gc cutoff", gc_cutoff, real_gc_cutoff); + gc_cutoff = real_gc_cutoff; + } + gc_cutoff + }; for (lsn, _timeline_id, _is_offloaded) in &gc_info.retain_lsns { if lsn < &gc_cutoff { retain_lsns_below_horizon.push(*lsn); @@ -1846,7 +1992,7 @@ impl Timeline { .map(|desc| desc.get_lsn_range().end) .max() else { - info!("no layers to compact with gc"); + info!("no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}", gc_cutoff); return Ok(()); }; // Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key @@ -1869,7 +2015,7 @@ impl Timeline { } } if selected_layers.is_empty() { - info!("no layers to compact with gc"); + info!("no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}", gc_cutoff, compaction_key_range.start, compaction_key_range.end); return Ok(()); } retain_lsns_below_horizon.sort(); @@ -1936,14 +2082,15 @@ impl Timeline { // Step 1: construct a k-merge iterator over all layers. // Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point. - let layer_names = job_desc - .selected_layers - .iter() - .map(|layer| layer.layer_desc().layer_name()) - .collect_vec(); - if let Some(err) = check_valid_layermap(&layer_names) { - warn!("gc-compaction layer map check failed because {}, this is normal if partial compaction is not finished yet", err); - } + // disable the check for now because we need to adjust the check for partial compactions, will enable later. + // let layer_names = job_desc + // .selected_layers + // .iter() + // .map(|layer| layer.layer_desc().layer_name()) + // .collect_vec(); + // if let Some(err) = check_valid_layermap(&layer_names) { + // warn!("gc-compaction layer map check failed because {}, this is normal if partial compaction is not finished yet", err); + // } // The maximum LSN we are processing in this compaction loop let end_lsn = job_desc .selected_layers @@ -2048,6 +2195,11 @@ impl Timeline { // This is not handled in the filter iterator because shard is determined by hash. // Therefore, it does not give us any performance benefit to do things like skip // a whole layer file as handling key spaces (ranges). + if cfg!(debug_assertions) { + let shard = self.shard_identity.shard_index(); + let owner = self.shard_identity.get_shard_number(&key); + panic!("key {key} does not belong on shard {shard}, owned by {owner}"); + } continue; } if !job_desc.compaction_key_range.contains(&key) { diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index 67fc710c44ee..47a93b19d270 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -298,7 +298,7 @@ impl DeleteTimelineFlow { None, // Ancestor is not needed for deletion. TimelineResources { remote_client, - timeline_get_throttle: tenant.timeline_get_throttle.clone(), + pagestream_throttle: tenant.pagestream_throttle.clone(), l0_flush_global_state: tenant.l0_flush_global_state.clone(), }, // Important. We dont pass ancestor above because it can be missing. diff --git a/pageserver/src/tenant/timeline/import_pgdata/flow.rs b/pageserver/src/tenant/timeline/import_pgdata/flow.rs index cbd4168c06e5..43880726063a 100644 --- a/pageserver/src/tenant/timeline/import_pgdata/flow.rs +++ b/pageserver/src/tenant/timeline/import_pgdata/flow.rs @@ -129,22 +129,23 @@ impl Flow { } // Import SLRUs - - // pg_xact (01:00 keyspace) - self.import_slru(SlruKind::Clog, &self.storage.pgdata().join("pg_xact")) + if self.timeline.tenant_shard_id.is_shard_zero() { + // pg_xact (01:00 keyspace) + self.import_slru(SlruKind::Clog, &self.storage.pgdata().join("pg_xact")) + .await?; + // pg_multixact/members (01:01 keyspace) + self.import_slru( + SlruKind::MultiXactMembers, + &self.storage.pgdata().join("pg_multixact/members"), + ) .await?; - // pg_multixact/members (01:01 keyspace) - self.import_slru( - SlruKind::MultiXactMembers, - &self.storage.pgdata().join("pg_multixact/members"), - ) - .await?; - // pg_multixact/offsets (01:02 keyspace) - self.import_slru( - SlruKind::MultiXactOffsets, - &self.storage.pgdata().join("pg_multixact/offsets"), - ) - .await?; + // pg_multixact/offsets (01:02 keyspace) + self.import_slru( + SlruKind::MultiXactOffsets, + &self.storage.pgdata().join("pg_multixact/offsets"), + ) + .await?; + } // Import pg_twophase. // TODO: as empty @@ -302,6 +303,8 @@ impl Flow { } async fn import_slru(&mut self, kind: SlruKind, path: &RemotePath) -> anyhow::Result<()> { + assert!(self.timeline.tenant_shard_id.is_shard_zero()); + let segments = self.storage.listfilesindir(path).await?; let segments: Vec<(String, u32, usize)> = segments .into_iter() @@ -337,7 +340,6 @@ impl Flow { debug!(%p, segno=%segno, %size, %start_key, %end_key, "scheduling SLRU segment"); self.tasks .push(AnyImportTask::SlruBlocks(ImportSlruBlocksTask::new( - *self.timeline.get_shard_identity(), start_key..end_key, &p, self.storage.clone(), @@ -631,21 +633,14 @@ impl ImportTask for ImportRelBlocksTask { } struct ImportSlruBlocksTask { - shard_identity: ShardIdentity, key_range: Range, path: RemotePath, storage: RemoteStorageWrapper, } impl ImportSlruBlocksTask { - fn new( - shard_identity: ShardIdentity, - key_range: Range, - path: &RemotePath, - storage: RemoteStorageWrapper, - ) -> Self { + fn new(key_range: Range, path: &RemotePath, storage: RemoteStorageWrapper) -> Self { ImportSlruBlocksTask { - shard_identity, key_range, path: path.clone(), storage, @@ -673,17 +668,13 @@ impl ImportTask for ImportSlruBlocksTask { let mut file_offset = 0; while blknum < end_blk { let key = slru_block_to_key(kind, segno, blknum); - assert!( - !self.shard_identity.is_key_disposable(&key), - "SLRU keys need to go into every shard" - ); let buf = &buf[file_offset..(file_offset + 8192)]; file_offset += 8192; layer_writer .put_image(key, Bytes::copy_from_slice(buf), ctx) .await?; - blknum += 1; nimages += 1; + blknum += 1; } Ok(nimages) } diff --git a/pageserver/src/tenant/timeline/layer_manager.rs b/pageserver/src/tenant/timeline/layer_manager.rs index 4293a44dca25..3888e7f86a9b 100644 --- a/pageserver/src/tenant/timeline/layer_manager.rs +++ b/pageserver/src/tenant/timeline/layer_manager.rs @@ -182,7 +182,7 @@ impl OpenLayerManager { conf: &'static PageServerConf, timeline_id: TimelineId, tenant_shard_id: TenantShardId, - gate_guard: utils::sync::gate::GateGuard, + gate: &utils::sync::gate::Gate, ctx: &RequestContext, ) -> anyhow::Result> { ensure!(lsn.is_aligned()); @@ -212,15 +212,9 @@ impl OpenLayerManager { lsn ); - let new_layer = InMemoryLayer::create( - conf, - timeline_id, - tenant_shard_id, - start_lsn, - gate_guard, - ctx, - ) - .await?; + let new_layer = + InMemoryLayer::create(conf, timeline_id, tenant_shard_id, start_lsn, gate, ctx) + .await?; let layer = Arc::new(new_layer); self.layer_map.open_layer = Some(layer.clone()); diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs index d90ffbfa2c11..3f10eeda60a9 100644 --- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs +++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs @@ -369,6 +369,13 @@ pub(super) async fn handle_walreceiver_connection( // advances it to its end LSN. 0 is just an initialization placeholder. let mut modification = timeline.begin_modification(Lsn(0)); + if !records.is_empty() { + timeline + .metrics + .wal_records_received + .inc_by(records.len() as u64); + } + for interpreted in records { if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes) && uncommitted_records > 0 @@ -510,6 +517,7 @@ pub(super) async fn handle_walreceiver_connection( } // Ingest the records without immediately committing them. + timeline.metrics.wal_records_received.inc(); let ingested = walingest .ingest_record(interpreted, &mut modification, &ctx) .await diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index b9f8c7ea2024..8a7f4a4bf5fd 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -20,7 +20,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use once_cell::sync::OnceCell; use owned_buffers_io::aligned_buffer::buffer::AlignedBuffer; use owned_buffers_io::aligned_buffer::{AlignedBufferMut, AlignedSlice, ConstAlign}; -use owned_buffers_io::io_buf_aligned::IoBufAlignedMut; +use owned_buffers_io::io_buf_aligned::{IoBufAligned, IoBufAlignedMut}; use owned_buffers_io::io_buf_ext::FullSlice; use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT; use pageserver_api::shard::TenantShardId; @@ -63,9 +63,6 @@ pub(crate) mod owned_buffers_io { pub(crate) mod io_buf_ext; pub(crate) mod slice; pub(crate) mod write; - pub(crate) mod util { - pub(crate) mod size_tracking_writer; - } } #[derive(Debug)] @@ -221,7 +218,7 @@ impl VirtualFile { self.inner.read_exact_at_page(page, offset, ctx).await } - pub async fn write_all_at( + pub async fn write_all_at( &self, buf: FullSlice, offset: u64, @@ -1325,14 +1322,14 @@ impl Drop for VirtualFileInner { } impl OwnedAsyncWriter for VirtualFile { - #[inline(always)] - async fn write_all( - &mut self, + async fn write_all_at( + &self, buf: FullSlice, + offset: u64, ctx: &RequestContext, - ) -> std::io::Result<(usize, FullSlice)> { - let (buf, res) = VirtualFile::write_all(self, buf, ctx).await; - res.map(move |v| (v, buf)) + ) -> std::io::Result> { + let (buf, res) = VirtualFile::write_all_at(self, buf, offset, ctx).await; + res.map(|_| buf) } } @@ -1451,7 +1448,7 @@ mod tests { } } } - async fn write_all_at( + async fn write_all_at( &self, buf: FullSlice, offset: u64, @@ -1594,6 +1591,7 @@ mod tests { &ctx, ) .await?; + file_a .write_all(b"foobar".to_vec().slice_len(), &ctx) .await?; @@ -1652,10 +1650,10 @@ mod tests { ) .await?; file_b - .write_all_at(b"BAR".to_vec().slice_len(), 3, &ctx) + .write_all_at(IoBuffer::from(b"BAR").slice_len(), 3, &ctx) .await?; file_b - .write_all_at(b"FOO".to_vec().slice_len(), 0, &ctx) + .write_all_at(IoBuffer::from(b"FOO").slice_len(), 0, &ctx) .await?; assert_eq!(file_b.read_string_at(2, 3, &ctx).await?, "OBA"); diff --git a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/alignment.rs b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/alignment.rs index 933b78a13b70..6b9992643f2a 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/alignment.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/alignment.rs @@ -4,7 +4,7 @@ pub trait Alignment: std::marker::Unpin + 'static { } /// Alignment at compile time. -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub struct ConstAlign; impl Alignment for ConstAlign { @@ -14,7 +14,7 @@ impl Alignment for ConstAlign { } /// Alignment at run time. -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub struct RuntimeAlign { align: usize, } diff --git a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs index 2fba6d699b28..a5c26cd7463a 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs @@ -3,9 +3,10 @@ use std::{ sync::Arc, }; -use super::{alignment::Alignment, raw::RawAlignedBuffer}; +use super::{alignment::Alignment, raw::RawAlignedBuffer, AlignedBufferMut, ConstAlign}; /// An shared, immutable aligned buffer type. +#[derive(Clone, Debug)] pub struct AlignedBuffer { /// Shared raw buffer. raw: Arc>, @@ -86,6 +87,13 @@ impl AlignedBuffer { range: begin..end, } } + + /// Returns the mutable aligned buffer, if the immutable aligned buffer + /// has exactly one strong reference. Otherwise returns `None`. + pub fn into_mut(self) -> Option> { + let raw = Arc::into_inner(self.raw)?; + Some(AlignedBufferMut::from_raw(raw)) + } } impl Deref for AlignedBuffer { @@ -108,6 +116,14 @@ impl PartialEq<[u8]> for AlignedBuffer { } } +impl From<&[u8; N]> for AlignedBuffer> { + fn from(value: &[u8; N]) -> Self { + let mut buf = AlignedBufferMut::with_capacity(N); + buf.extend_from_slice(value); + buf.freeze() + } +} + /// SAFETY: the underlying buffer references a stable memory region. unsafe impl tokio_epoll_uring::IoBuf for AlignedBuffer { fn stable_ptr(&self) -> *const u8 { diff --git a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs index b3675d1aeabb..d2f5e206bb09 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs @@ -1,4 +1,7 @@ -use std::ops::{Deref, DerefMut}; +use std::{ + mem::MaybeUninit, + ops::{Deref, DerefMut}, +}; use super::{ alignment::{Alignment, ConstAlign}, @@ -46,6 +49,11 @@ impl AlignedBufferMut> { } impl AlignedBufferMut { + /// Constructs a mutable aligned buffer from raw. + pub(super) fn from_raw(raw: RawAlignedBuffer) -> Self { + AlignedBufferMut { raw } + } + /// Returns the total number of bytes the buffer can hold. #[inline] pub fn capacity(&self) -> usize { @@ -128,6 +136,39 @@ impl AlignedBufferMut { let len = self.len(); AlignedBuffer::from_raw(self.raw, 0..len) } + + /// Clones and appends all elements in a slice to the buffer. Reserves additional capacity as needed. + #[inline] + pub fn extend_from_slice(&mut self, extend: &[u8]) { + let cnt = extend.len(); + self.reserve(cnt); + + // SAFETY: we already reserved additional `cnt` bytes, safe to perform memcpy. + unsafe { + let dst = self.spare_capacity_mut(); + // Reserved above + debug_assert!(dst.len() >= cnt); + + core::ptr::copy_nonoverlapping(extend.as_ptr(), dst.as_mut_ptr().cast(), cnt); + } + // SAFETY: We do have at least `cnt` bytes remaining before advance. + unsafe { + bytes::BufMut::advance_mut(self, cnt); + } + } + + /// Returns the remaining spare capacity of the vector as a slice of `MaybeUninit`. + #[inline] + fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { + // SAFETY: we guarantees that the `Self::capacity()` bytes from + // `Self::as_mut_ptr()` are allocated. + unsafe { + let ptr = self.as_mut_ptr().add(self.len()); + let len = self.capacity() - self.len(); + + core::slice::from_raw_parts_mut(ptr.cast(), len) + } + } } impl Deref for AlignedBufferMut { diff --git a/pageserver/src/virtual_file/owned_buffers_io/io_buf_aligned.rs b/pageserver/src/virtual_file/owned_buffers_io/io_buf_aligned.rs index dba695196ebb..4ea6b1774447 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/io_buf_aligned.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/io_buf_aligned.rs @@ -1,9 +1,15 @@ -use tokio_epoll_uring::IoBufMut; +use tokio_epoll_uring::{IoBuf, IoBufMut}; -use crate::virtual_file::{IoBufferMut, PageWriteGuardBuf}; +use crate::virtual_file::{IoBuffer, IoBufferMut, PageWriteGuardBuf}; +/// A marker trait for a mutable aligned buffer type. pub trait IoBufAlignedMut: IoBufMut {} +/// A marker trait for an aligned buffer type. +pub trait IoBufAligned: IoBuf {} + impl IoBufAlignedMut for IoBufferMut {} +impl IoBufAligned for IoBuffer {} + impl IoBufAlignedMut for PageWriteGuardBuf {} diff --git a/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs b/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs index c3940cf6cea2..525f447b6dac 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs @@ -5,6 +5,8 @@ use bytes::{Bytes, BytesMut}; use std::ops::{Deref, Range}; use tokio_epoll_uring::{BoundedBuf, IoBuf, Slice}; +use super::write::CheapCloneForRead; + /// The true owned equivalent for Rust [`slice`]. Use this for the write path. /// /// Unlike [`tokio_epoll_uring::Slice`], which we unfortunately inherited from `tokio-uring`, @@ -43,6 +45,18 @@ where } } +impl CheapCloneForRead for FullSlice +where + B: IoBuf + CheapCloneForRead, +{ + fn cheap_clone(&self) -> Self { + let bounds = self.slice.bounds(); + let clone = self.slice.get_ref().cheap_clone(); + let slice = clone.slice(bounds); + Self { slice } + } +} + pub(crate) trait IoBufExt { /// Get a [`FullSlice`] for the entire buffer, i.e., `self[..]` or `self[0..self.len()]`. fn slice_len(self) -> FullSlice diff --git a/pageserver/src/virtual_file/owned_buffers_io/util/size_tracking_writer.rs b/pageserver/src/virtual_file/owned_buffers_io/util/size_tracking_writer.rs deleted file mode 100644 index efcb61ba6532..000000000000 --- a/pageserver/src/virtual_file/owned_buffers_io/util/size_tracking_writer.rs +++ /dev/null @@ -1,50 +0,0 @@ -use crate::{ - context::RequestContext, - virtual_file::owned_buffers_io::{io_buf_ext::FullSlice, write::OwnedAsyncWriter}, -}; -use tokio_epoll_uring::IoBuf; - -pub struct Writer { - dst: W, - bytes_amount: u64, -} - -impl Writer { - pub fn new(dst: W) -> Self { - Self { - dst, - bytes_amount: 0, - } - } - - pub fn bytes_written(&self) -> u64 { - self.bytes_amount - } - - pub fn as_inner(&self) -> &W { - &self.dst - } - - /// Returns the wrapped `VirtualFile` object as well as the number - /// of bytes that were written to it through this object. - #[cfg_attr(target_os = "macos", allow(dead_code))] - pub fn into_inner(self) -> (u64, W) { - (self.bytes_amount, self.dst) - } -} - -impl OwnedAsyncWriter for Writer -where - W: OwnedAsyncWriter, -{ - #[inline(always)] - async fn write_all( - &mut self, - buf: FullSlice, - ctx: &RequestContext, - ) -> std::io::Result<(usize, FullSlice)> { - let (nwritten, buf) = self.dst.write_all(buf, ctx).await?; - self.bytes_amount += u64::try_from(nwritten).unwrap(); - Ok((nwritten, buf)) - } -} diff --git a/pageserver/src/virtual_file/owned_buffers_io/write.rs b/pageserver/src/virtual_file/owned_buffers_io/write.rs index 568cf62e5617..7299d8370301 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/write.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/write.rs @@ -1,55 +1,88 @@ -use bytes::BytesMut; +mod flush; +use std::sync::Arc; + +use flush::FlushHandle; use tokio_epoll_uring::IoBuf; -use crate::context::RequestContext; +use crate::{ + context::RequestContext, + virtual_file::{IoBuffer, IoBufferMut}, +}; + +use super::{ + io_buf_aligned::IoBufAligned, + io_buf_ext::{FullSlice, IoBufExt}, +}; -use super::io_buf_ext::{FullSlice, IoBufExt}; +pub(crate) use flush::FlushControl; + +pub(crate) trait CheapCloneForRead { + /// Returns a cheap clone of the buffer. + fn cheap_clone(&self) -> Self; +} + +impl CheapCloneForRead for IoBuffer { + fn cheap_clone(&self) -> Self { + // Cheap clone over an `Arc`. + self.clone() + } +} /// A trait for doing owned-buffer write IO. /// Think [`tokio::io::AsyncWrite`] but with owned buffers. +/// The owned buffers need to be aligned due to Direct IO requirements. pub trait OwnedAsyncWriter { - async fn write_all( - &mut self, + fn write_all_at( + &self, buf: FullSlice, + offset: u64, ctx: &RequestContext, - ) -> std::io::Result<(usize, FullSlice)>; + ) -> impl std::future::Future>> + Send; } /// A wrapper aorund an [`OwnedAsyncWriter`] that uses a [`Buffer`] to batch /// small writes into larger writes of size [`Buffer::cap`]. -/// -/// # Passthrough Of Large Writers -/// -/// Calls to [`BufferedWriter::write_buffered`] that are larger than [`Buffer::cap`] -/// cause the internal buffer to be flushed prematurely so that the large -/// buffered write is passed through to the underlying [`OwnedAsyncWriter`]. -/// -/// This pass-through is generally beneficial for throughput, but if -/// the storage backend of the [`OwnedAsyncWriter`] is a shared resource, -/// unlimited large writes may cause latency or fairness issues. -/// -/// In such cases, a different implementation that always buffers in memory -/// may be preferable. -pub struct BufferedWriter { - writer: W, +// TODO(yuchen): For large write, implementing buffer bypass for aligned parts of the write could be beneficial to throughput, +// since we would avoid copying majority of the data into the internal buffer. +pub struct BufferedWriter { + writer: Arc, /// invariant: always remains Some(buf) except /// - while IO is ongoing => goes back to Some() once the IO completed successfully /// - after an IO error => stays `None` forever /// /// In these exceptional cases, it's `None`. - buf: Option, + mutable: Option, + /// A handle to the background flush task for writting data to disk. + flush_handle: FlushHandle, + /// The number of bytes submitted to the background task. + bytes_submitted: u64, } impl BufferedWriter where - B: Buffer + Send, - Buf: IoBuf + Send, - W: OwnedAsyncWriter, + B: Buffer + Send + 'static, + Buf: IoBufAligned + Send + Sync + CheapCloneForRead, + W: OwnedAsyncWriter + Send + Sync + 'static + std::fmt::Debug, { - pub fn new(writer: W, buf: B) -> Self { + /// Creates a new buffered writer. + /// + /// The `buf_new` function provides a way to initialize the owned buffers used by this writer. + pub fn new( + writer: Arc, + buf_new: impl Fn() -> B, + gate_guard: utils::sync::gate::GateGuard, + ctx: &RequestContext, + ) -> Self { Self { - writer, - buf: Some(buf), + writer: writer.clone(), + mutable: Some(buf_new()), + flush_handle: FlushHandle::spawn_new( + writer, + buf_new(), + gate_guard, + ctx.attached_child(), + ), + bytes_submitted: 0, } } @@ -57,87 +90,71 @@ where &self.writer } + /// Returns the number of bytes submitted to the background flush task. + pub fn bytes_submitted(&self) -> u64 { + self.bytes_submitted + } + /// Panics if used after any of the write paths returned an error - pub fn inspect_buffer(&self) -> &B { - self.buf() + pub fn inspect_mutable(&self) -> &B { + self.mutable() + } + + /// Gets a reference to the maybe flushed read-only buffer. + /// Returns `None` if the writer has not submitted any flush request. + pub fn inspect_maybe_flushed(&self) -> Option<&FullSlice> { + self.flush_handle.maybe_flushed.as_ref() } #[cfg_attr(target_os = "macos", allow(dead_code))] - pub async fn flush_and_into_inner(mut self, ctx: &RequestContext) -> std::io::Result { + pub async fn flush_and_into_inner( + mut self, + ctx: &RequestContext, + ) -> std::io::Result<(u64, Arc)> { self.flush(ctx).await?; - let Self { buf, writer } = self; + let Self { + mutable: buf, + writer, + mut flush_handle, + bytes_submitted: bytes_amount, + } = self; + flush_handle.shutdown().await?; assert!(buf.is_some()); - Ok(writer) + Ok((bytes_amount, writer)) } + /// Gets a reference to the mutable in-memory buffer. #[inline(always)] - fn buf(&self) -> &B { - self.buf + fn mutable(&self) -> &B { + self.mutable .as_ref() .expect("must not use after we returned an error") } - /// Guarantees that if Ok() is returned, all bytes in `chunk` have been accepted. #[cfg_attr(target_os = "macos", allow(dead_code))] - pub async fn write_buffered( + pub async fn write_buffered_borrowed( &mut self, - chunk: FullSlice, + chunk: &[u8], ctx: &RequestContext, - ) -> std::io::Result<(usize, FullSlice)> { - let chunk = chunk.into_raw_slice(); - - let chunk_len = chunk.len(); - // avoid memcpy for the middle of the chunk - if chunk.len() >= self.buf().cap() { - self.flush(ctx).await?; - // do a big write, bypassing `buf` - assert_eq!( - self.buf - .as_ref() - .expect("must not use after an error") - .pending(), - 0 - ); - let (nwritten, chunk) = self - .writer - .write_all(FullSlice::must_new(chunk), ctx) - .await?; - assert_eq!(nwritten, chunk_len); - return Ok((nwritten, chunk)); - } - // in-memory copy the < BUFFER_SIZED tail of the chunk - assert!(chunk.len() < self.buf().cap()); - let mut slice = &chunk[..]; - while !slice.is_empty() { - let buf = self.buf.as_mut().expect("must not use after an error"); - let need = buf.cap() - buf.pending(); - let have = slice.len(); - let n = std::cmp::min(need, have); - buf.extend_from_slice(&slice[..n]); - slice = &slice[n..]; - if buf.pending() >= buf.cap() { - assert_eq!(buf.pending(), buf.cap()); - self.flush(ctx).await?; - } + ) -> std::io::Result { + let (len, control) = self.write_buffered_borrowed_controlled(chunk, ctx).await?; + if let Some(control) = control { + control.release().await; } - assert!(slice.is_empty(), "by now we should have drained the chunk"); - Ok((chunk_len, FullSlice::must_new(chunk))) + Ok(len) } - /// Strictly less performant variant of [`Self::write_buffered`] that allows writing borrowed data. - /// - /// It is less performant because we always have to copy the borrowed data into the internal buffer - /// before we can do the IO. The [`Self::write_buffered`] can avoid this, which is more performant - /// for large writes. - pub async fn write_buffered_borrowed( + /// In addition to bytes submitted in this write, also returns a handle that can control the flush behavior. + pub(crate) async fn write_buffered_borrowed_controlled( &mut self, mut chunk: &[u8], ctx: &RequestContext, - ) -> std::io::Result { + ) -> std::io::Result<(usize, Option)> { let chunk_len = chunk.len(); + let mut control: Option = None; while !chunk.is_empty() { - let buf = self.buf.as_mut().expect("must not use after an error"); + let buf = self.mutable.as_mut().expect("must not use after an error"); let need = buf.cap() - buf.pending(); let have = chunk.len(); let n = std::cmp::min(need, have); @@ -145,26 +162,27 @@ where chunk = &chunk[n..]; if buf.pending() >= buf.cap() { assert_eq!(buf.pending(), buf.cap()); - self.flush(ctx).await?; + if let Some(control) = control.take() { + control.release().await; + } + control = self.flush(ctx).await?; } } - Ok(chunk_len) + Ok((chunk_len, control)) } - async fn flush(&mut self, ctx: &RequestContext) -> std::io::Result<()> { - let buf = self.buf.take().expect("must not use after an error"); + #[must_use = "caller must explcitly check the flush control"] + async fn flush(&mut self, _ctx: &RequestContext) -> std::io::Result> { + let buf = self.mutable.take().expect("must not use after an error"); let buf_len = buf.pending(); if buf_len == 0 { - self.buf = Some(buf); - return Ok(()); + self.mutable = Some(buf); + return Ok(None); } - let slice = buf.flush(); - let (nwritten, slice) = self.writer.write_all(slice, ctx).await?; - assert_eq!(nwritten, buf_len); - self.buf = Some(Buffer::reuse_after_flush( - slice.into_raw_slice().into_inner(), - )); - Ok(()) + let (recycled, flush_control) = self.flush_handle.flush(buf, self.bytes_submitted).await?; + self.bytes_submitted += u64::try_from(buf_len).unwrap(); + self.mutable = Some(recycled); + Ok(Some(flush_control)) } } @@ -192,64 +210,77 @@ pub trait Buffer { fn reuse_after_flush(iobuf: Self::IoBuf) -> Self; } -impl Buffer for BytesMut { - type IoBuf = BytesMut; +impl Buffer for IoBufferMut { + type IoBuf = IoBuffer; - #[inline(always)] fn cap(&self) -> usize { self.capacity() } fn extend_from_slice(&mut self, other: &[u8]) { - BytesMut::extend_from_slice(self, other) + if self.len() + other.len() > self.cap() { + panic!("Buffer capacity exceeded"); + } + + IoBufferMut::extend_from_slice(self, other); } - #[inline(always)] fn pending(&self) -> usize { self.len() } - fn flush(self) -> FullSlice { - self.slice_len() - } - - fn reuse_after_flush(mut iobuf: BytesMut) -> Self { - iobuf.clear(); - iobuf + fn flush(self) -> FullSlice { + self.freeze().slice_len() } -} -impl OwnedAsyncWriter for Vec { - async fn write_all( - &mut self, - buf: FullSlice, - _: &RequestContext, - ) -> std::io::Result<(usize, FullSlice)> { - self.extend_from_slice(&buf[..]); - Ok((buf.len(), buf)) + /// Caller should make sure that `iobuf` only have one strong reference before invoking this method. + fn reuse_after_flush(iobuf: Self::IoBuf) -> Self { + let mut recycled = iobuf + .into_mut() + .expect("buffer should only have one strong reference"); + recycled.clear(); + recycled } } #[cfg(test)] mod tests { - use bytes::BytesMut; + use std::sync::Mutex; use super::*; use crate::context::{DownloadBehavior, RequestContext}; use crate::task_mgr::TaskKind; - #[derive(Default)] + #[derive(Default, Debug)] struct RecorderWriter { - writes: Vec>, + /// record bytes and write offsets. + writes: Mutex, u64)>>, + } + + impl RecorderWriter { + /// Gets recorded bytes and write offsets. + fn get_writes(&self) -> Vec> { + self.writes + .lock() + .unwrap() + .iter() + .map(|(buf, _)| buf.clone()) + .collect() + } } + impl OwnedAsyncWriter for RecorderWriter { - async fn write_all( - &mut self, + async fn write_all_at( + &self, buf: FullSlice, + offset: u64, _: &RequestContext, - ) -> std::io::Result<(usize, FullSlice)> { - self.writes.push(Vec::from(&buf[..])); - Ok((buf.len(), buf)) + ) -> std::io::Result> { + self.writes + .lock() + .unwrap() + .push((Vec::from(&buf[..]), offset)); + Ok(buf) } } @@ -257,71 +288,21 @@ mod tests { RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error) } - macro_rules! write { - ($writer:ident, $data:literal) => {{ - $writer - .write_buffered(::bytes::Bytes::from_static($data).slice_len(), &test_ctx()) - .await?; - }}; - } - - #[tokio::test] - async fn test_buffered_writes_only() -> std::io::Result<()> { - let recorder = RecorderWriter::default(); - let mut writer = BufferedWriter::new(recorder, BytesMut::with_capacity(2)); - write!(writer, b"a"); - write!(writer, b"b"); - write!(writer, b"c"); - write!(writer, b"d"); - write!(writer, b"e"); - let recorder = writer.flush_and_into_inner(&test_ctx()).await?; - assert_eq!( - recorder.writes, - vec![Vec::from(b"ab"), Vec::from(b"cd"), Vec::from(b"e")] - ); - Ok(()) - } - - #[tokio::test] - async fn test_passthrough_writes_only() -> std::io::Result<()> { - let recorder = RecorderWriter::default(); - let mut writer = BufferedWriter::new(recorder, BytesMut::with_capacity(2)); - write!(writer, b"abc"); - write!(writer, b"de"); - write!(writer, b""); - write!(writer, b"fghijk"); - let recorder = writer.flush_and_into_inner(&test_ctx()).await?; - assert_eq!( - recorder.writes, - vec![Vec::from(b"abc"), Vec::from(b"de"), Vec::from(b"fghijk")] - ); - Ok(()) - } - - #[tokio::test] - async fn test_passthrough_write_with_nonempty_buffer() -> std::io::Result<()> { - let recorder = RecorderWriter::default(); - let mut writer = BufferedWriter::new(recorder, BytesMut::with_capacity(2)); - write!(writer, b"a"); - write!(writer, b"bc"); - write!(writer, b"d"); - write!(writer, b"e"); - let recorder = writer.flush_and_into_inner(&test_ctx()).await?; - assert_eq!( - recorder.writes, - vec![Vec::from(b"a"), Vec::from(b"bc"), Vec::from(b"de")] - ); - Ok(()) - } - #[tokio::test] - async fn test_write_all_borrowed_always_goes_through_buffer() -> std::io::Result<()> { + async fn test_write_all_borrowed_always_goes_through_buffer() -> anyhow::Result<()> { let ctx = test_ctx(); let ctx = &ctx; - let recorder = RecorderWriter::default(); - let mut writer = BufferedWriter::new(recorder, BytesMut::with_capacity(2)); + let recorder = Arc::new(RecorderWriter::default()); + let gate = utils::sync::gate::Gate::default(); + let mut writer = BufferedWriter::<_, RecorderWriter>::new( + recorder, + || IoBufferMut::with_capacity(2), + gate.enter()?, + ctx, + ); writer.write_buffered_borrowed(b"abc", ctx).await?; + writer.write_buffered_borrowed(b"", ctx).await?; writer.write_buffered_borrowed(b"d", ctx).await?; writer.write_buffered_borrowed(b"e", ctx).await?; writer.write_buffered_borrowed(b"fg", ctx).await?; @@ -329,9 +310,9 @@ mod tests { writer.write_buffered_borrowed(b"j", ctx).await?; writer.write_buffered_borrowed(b"klmno", ctx).await?; - let recorder = writer.flush_and_into_inner(ctx).await?; + let (_, recorder) = writer.flush_and_into_inner(ctx).await?; assert_eq!( - recorder.writes, + recorder.get_writes(), { let expect: &[&[u8]] = &[b"ab", b"cd", b"ef", b"gh", b"ij", b"kl", b"mn", b"o"]; expect diff --git a/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs b/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs new file mode 100644 index 000000000000..9ce8b311bb5c --- /dev/null +++ b/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs @@ -0,0 +1,314 @@ +use std::sync::Arc; + +use utils::sync::duplex; + +use crate::{ + context::RequestContext, + virtual_file::owned_buffers_io::{io_buf_aligned::IoBufAligned, io_buf_ext::FullSlice}, +}; + +use super::{Buffer, CheapCloneForRead, OwnedAsyncWriter}; + +/// A handle to the flush task. +pub struct FlushHandle { + inner: Option>, + /// Immutable buffer for serving tail reads. + /// `None` if no flush request has been submitted. + pub(super) maybe_flushed: Option>, +} + +pub struct FlushHandleInner { + /// A bi-directional channel that sends (buffer, offset) for writes, + /// and receives recyled buffer. + channel: duplex::mpsc::Duplex, FullSlice>, + /// Join handle for the background flush task. + join_handle: tokio::task::JoinHandle>>, +} + +struct FlushRequest { + slice: FullSlice, + offset: u64, + #[cfg(test)] + ready_to_flush_rx: tokio::sync::oneshot::Receiver<()>, + #[cfg(test)] + done_flush_tx: tokio::sync::oneshot::Sender<()>, +} + +/// Constructs a request and a control object for a new flush operation. +#[cfg(not(test))] +fn new_flush_op(slice: FullSlice, offset: u64) -> (FlushRequest, FlushControl) { + let request = FlushRequest { slice, offset }; + let control = FlushControl::untracked(); + + (request, control) +} + +/// Constructs a request and a control object for a new flush operation. +#[cfg(test)] +fn new_flush_op(slice: FullSlice, offset: u64) -> (FlushRequest, FlushControl) { + let (ready_to_flush_tx, ready_to_flush_rx) = tokio::sync::oneshot::channel(); + let (done_flush_tx, done_flush_rx) = tokio::sync::oneshot::channel(); + let control = FlushControl::not_started(ready_to_flush_tx, done_flush_rx); + + let request = FlushRequest { + slice, + offset, + ready_to_flush_rx, + done_flush_tx, + }; + (request, control) +} + +/// A handle to a `FlushRequest` that allows unit tests precise control over flush behavior. +#[cfg(test)] +pub(crate) struct FlushControl { + not_started: FlushNotStarted, +} + +#[cfg(not(test))] +pub(crate) struct FlushControl; + +impl FlushControl { + #[cfg(test)] + fn not_started( + ready_to_flush_tx: tokio::sync::oneshot::Sender<()>, + done_flush_rx: tokio::sync::oneshot::Receiver<()>, + ) -> Self { + FlushControl { + not_started: FlushNotStarted { + ready_to_flush_tx, + done_flush_rx, + }, + } + } + + #[cfg(not(test))] + fn untracked() -> Self { + FlushControl + } + + /// In tests, turn flush control into a not started state. + #[cfg(test)] + pub(crate) fn into_not_started(self) -> FlushNotStarted { + self.not_started + } + + /// Release control to the submitted buffer. + /// + /// In `cfg(test)` environment, the buffer is guranteed to be flushed to disk after [`FlushControl::release`] is finishes execution. + pub async fn release(self) { + #[cfg(test)] + { + self.not_started + .ready_to_flush() + .wait_until_flush_is_done() + .await; + } + } +} + +impl FlushHandle +where + Buf: IoBufAligned + Send + Sync + CheapCloneForRead, + W: OwnedAsyncWriter + Send + Sync + 'static + std::fmt::Debug, +{ + /// Spawns a new background flush task and obtains a handle. + /// + /// Note: The background task so we do not need to explicitly maintain a queue of buffers. + pub fn spawn_new( + file: Arc, + buf: B, + gate_guard: utils::sync::gate::GateGuard, + ctx: RequestContext, + ) -> Self + where + B: Buffer + Send + 'static, + { + // It is fine to buffer up to only 1 message. We only 1 message in-flight at a time. + let (front, back) = duplex::mpsc::channel(1); + + let join_handle = tokio::spawn(async move { + FlushBackgroundTask::new(back, file, gate_guard, ctx) + .run(buf.flush()) + .await + }); + + FlushHandle { + inner: Some(FlushHandleInner { + channel: front, + join_handle, + }), + maybe_flushed: None, + } + } + + /// Submits a buffer to be flushed in the background task. + /// Returns a buffer that completed flushing for re-use, length reset to 0, capacity unchanged. + /// If `save_buf_for_read` is true, then we save the buffer in `Self::maybe_flushed`, otherwise + /// clear `maybe_flushed`. + pub async fn flush(&mut self, buf: B, offset: u64) -> std::io::Result<(B, FlushControl)> + where + B: Buffer + Send + 'static, + { + let slice = buf.flush(); + + // Saves a buffer for read while flushing. This also removes reference to the old buffer. + self.maybe_flushed = Some(slice.cheap_clone()); + + let (request, flush_control) = new_flush_op(slice, offset); + + // Submits the buffer to the background task. + let submit = self.inner_mut().channel.send(request).await; + if submit.is_err() { + return self.handle_error().await; + } + + // Wait for an available buffer from the background flush task. + // This is the BACKPRESSURE mechanism: if the flush task can't keep up, + // then the write path will eventually wait for it here. + let Some(recycled) = self.inner_mut().channel.recv().await else { + return self.handle_error().await; + }; + + // The only other place that could hold a reference to the recycled buffer + // is in `Self::maybe_flushed`, but we have already replace it with the new buffer. + let recycled = Buffer::reuse_after_flush(recycled.into_raw_slice().into_inner()); + Ok((recycled, flush_control)) + } + + async fn handle_error(&mut self) -> std::io::Result { + Err(self + .shutdown() + .await + .expect_err("flush task only disconnects duplex if it exits with an error")) + } + + /// Cleans up the channel, join the flush task. + pub async fn shutdown(&mut self) -> std::io::Result> { + let handle = self + .inner + .take() + .expect("must not use after we returned an error"); + drop(handle.channel.tx); + handle.join_handle.await.unwrap() + } + + /// Gets a mutable reference to the inner handle. Panics if [`Self::inner`] is `None`. + /// This only happens if the handle is used after an error. + fn inner_mut(&mut self) -> &mut FlushHandleInner { + self.inner + .as_mut() + .expect("must not use after we returned an error") + } +} + +/// A background task for flushing data to disk. +pub struct FlushBackgroundTask { + /// A bi-directional channel that receives (buffer, offset) for writes, + /// and send back recycled buffer. + channel: duplex::mpsc::Duplex, FlushRequest>, + /// A writter for persisting data to disk. + writer: Arc, + ctx: RequestContext, + /// Prevent timeline from shuting down until the flush background task finishes flushing all remaining buffers to disk. + _gate_guard: utils::sync::gate::GateGuard, +} + +impl FlushBackgroundTask +where + Buf: IoBufAligned + Send + Sync, + W: OwnedAsyncWriter + Sync + 'static, +{ + /// Creates a new background flush task. + fn new( + channel: duplex::mpsc::Duplex, FlushRequest>, + file: Arc, + gate_guard: utils::sync::gate::GateGuard, + ctx: RequestContext, + ) -> Self { + FlushBackgroundTask { + channel, + writer: file, + _gate_guard: gate_guard, + ctx, + } + } + + /// Runs the background flush task. + /// The passed in slice is immediately sent back to the flush handle through the duplex channel. + async fn run(mut self, slice: FullSlice) -> std::io::Result> { + // Sends the extra buffer back to the handle. + self.channel.send(slice).await.map_err(|_| { + std::io::Error::new(std::io::ErrorKind::BrokenPipe, "flush handle closed early") + })?; + + // Exit condition: channel is closed and there is no remaining buffer to be flushed + while let Some(request) = self.channel.recv().await { + #[cfg(test)] + { + // In test, wait for control to signal that we are ready to flush. + if request.ready_to_flush_rx.await.is_err() { + tracing::debug!("control dropped"); + } + } + + // Write slice to disk at `offset`. + let slice = self + .writer + .write_all_at(request.slice, request.offset, &self.ctx) + .await?; + + #[cfg(test)] + { + // In test, tell control we are done flushing buffer. + if request.done_flush_tx.send(()).is_err() { + tracing::debug!("control dropped"); + } + } + + // Sends the buffer back to the handle for reuse. The handle is in charged of cleaning the buffer. + if self.channel.send(slice).await.is_err() { + // Although channel is closed. Still need to finish flushing the remaining buffers. + continue; + } + } + + Ok(self.writer) + } +} + +#[cfg(test)] +pub(crate) struct FlushNotStarted { + ready_to_flush_tx: tokio::sync::oneshot::Sender<()>, + done_flush_rx: tokio::sync::oneshot::Receiver<()>, +} + +#[cfg(test)] +pub(crate) struct FlushInProgress { + done_flush_rx: tokio::sync::oneshot::Receiver<()>, +} + +#[cfg(test)] +pub(crate) struct FlushDone; + +#[cfg(test)] +impl FlushNotStarted { + /// Signals the background task the buffer is ready to flush to disk. + pub fn ready_to_flush(self) -> FlushInProgress { + self.ready_to_flush_tx + .send(()) + .map(|_| FlushInProgress { + done_flush_rx: self.done_flush_rx, + }) + .unwrap() + } +} + +#[cfg(test)] +impl FlushInProgress { + /// Waits until background flush is done. + pub async fn wait_until_flush_is_done(self) -> FlushDone { + self.done_flush_rx.await.unwrap(); + FlushDone + } +} diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index d568da596ab7..30c8965d517d 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -582,18 +582,21 @@ impl WalIngest { forknum: FSM_FORKNUM, }; + // Zero out the last remaining FSM page, if this shard owns it. We are not precise here, + // and instead of digging in the FSM bitmap format we just clear the whole page. let fsm_logical_page_no = blkno / pg_constants::SLOTS_PER_FSM_PAGE; let mut fsm_physical_page_no = fsm_logical_to_physical(fsm_logical_page_no); - if blkno % pg_constants::SLOTS_PER_FSM_PAGE != 0 { - // Tail of last remaining FSM page has to be zeroed. - // We are not precise here and instead of digging in FSM bitmap format just clear the whole page. + if blkno % pg_constants::SLOTS_PER_FSM_PAGE != 0 + && self + .shard + .is_key_local(&rel_block_to_key(rel, fsm_physical_page_no)) + { modification.put_rel_page_image_zero(rel, fsm_physical_page_no)?; fsm_physical_page_no += 1; } - // TODO: re-examine the None case here wrt. sharding; should we error? + // Truncate this shard's view of the FSM relation size, if it even has one. let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0); if nblocks > fsm_physical_page_no { - // check if something to do: FSM is larger than truncate position self.put_rel_truncation(modification, rel, fsm_physical_page_no, ctx) .await?; } @@ -617,7 +620,7 @@ impl WalIngest { // tail bits in the last remaining map page, representing truncated heap // blocks, need to be cleared. This is not only tidy, but also necessary // because we don't get a chance to clear the bits if the heap is extended - // again. + // again. Only do this on the shard that owns the page. if (trunc_byte != 0 || trunc_offs != 0) && self.shard.is_key_local(&rel_block_to_key(rel, vm_page_no)) { @@ -631,10 +634,9 @@ impl WalIngest { )?; vm_page_no += 1; } - // TODO: re-examine the None case here wrt. sharding; should we error? + // Truncate this shard's view of the VM relation size, if it even has one. let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0); if nblocks > vm_page_no { - // check if something to do: VM is larger than truncate position self.put_rel_truncation(modification, rel, vm_page_no, ctx) .await?; } @@ -1392,6 +1394,10 @@ impl WalIngest { img: Bytes, ctx: &RequestContext, ) -> Result<()> { + if !self.shard.is_shard_zero() { + return Ok(()); + } + self.handle_slru_extend(modification, kind, segno, blknum, ctx) .await?; modification.put_slru_page_image(kind, segno, blknum, img)?; diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index a5e0c402fbb7..880c0de64e61 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -610,6 +610,9 @@ prefetch_read(PrefetchRequest *slot) { NeonResponse *response; MemoryContext old; + BufferTag buftag; + shardno_t shard_no; + uint64 my_ring_index; Assert(slot->status == PRFS_REQUESTED); Assert(slot->response == NULL); @@ -623,11 +626,29 @@ prefetch_read(PrefetchRequest *slot) slot->status, slot->response, (long)slot->my_ring_index, (long)MyPState->ring_receive); + /* + * Copy the request info so that if an error happens and the prefetch + * queue is flushed during the receive call, we can print the original + * values in the error message + */ + buftag = slot->buftag; + shard_no = slot->shard_no; + my_ring_index = slot->my_ring_index; + old = MemoryContextSwitchTo(MyPState->errctx); - response = (NeonResponse *) page_server->receive(slot->shard_no); + response = (NeonResponse *) page_server->receive(shard_no); MemoryContextSwitchTo(old); if (response) { + /* The slot should still be valid */ + if (slot->status != PRFS_REQUESTED || + slot->response != NULL || + slot->my_ring_index != MyPState->ring_receive) + neon_shard_log(shard_no, ERROR, + "Incorrect prefetch slot state after receive: status=%d response=%p my=%lu receive=%lu", + slot->status, slot->response, + (long) slot->my_ring_index, (long) MyPState->ring_receive); + /* update prefetch state */ MyPState->n_responses_buffered += 1; MyPState->n_requests_inflight -= 1; @@ -642,11 +663,15 @@ prefetch_read(PrefetchRequest *slot) } else { - neon_shard_log(slot->shard_no, LOG, + /* + * Note: The slot might no longer be valid, if the connection was lost + * and the prefetch queue was flushed during the receive call + */ + neon_shard_log(shard_no, LOG, "No response from reading prefetch entry %lu: %u/%u/%u.%u block %u. This can be caused by a concurrent disconnect", - (long)slot->my_ring_index, - RelFileInfoFmt(BufTagGetNRelFileInfo(slot->buftag)), - slot->buftag.forkNum, slot->buftag.blockNum); + (long) my_ring_index, + RelFileInfoFmt(BufTagGetNRelFileInfo(buftag)), + buftag.forkNum, buftag.blockNum); return false; } } diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml index f5934c8a89dd..2f63ee3acc42 100644 --- a/proxy/Cargo.toml +++ b/proxy/Cargo.toml @@ -6,7 +6,7 @@ license.workspace = true [features] default = [] -testing = [] +testing = ["dep:tokio-postgres"] [dependencies] ahash.workspace = true @@ -55,6 +55,7 @@ parquet.workspace = true parquet_derive.workspace = true pin-project-lite.workspace = true postgres_backend.workspace = true +postgres-client = { package = "tokio-postgres2", path = "../libs/proxy/tokio-postgres2" } postgres-protocol = { package = "postgres-protocol2", path = "../libs/proxy/postgres-protocol2" } pq_proto.workspace = true prometheus.workspace = true @@ -81,7 +82,7 @@ subtle.workspace = true thiserror.workspace = true tikv-jemallocator.workspace = true tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] } -tokio-postgres = { package = "tokio-postgres2", path = "../libs/proxy/tokio-postgres2" } +tokio-postgres = { workspace = true, optional = true } tokio-rustls.workspace = true tokio-util.workspace = true tokio = { workspace = true, features = ["signal"] } @@ -119,3 +120,4 @@ rcgen.workspace = true rstest.workspace = true walkdir.workspace = true rand_distr = "0.4" +tokio-postgres.workspace = true diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs index 491b272ac4e8..5e494dfdd694 100644 --- a/proxy/src/auth/backend/classic.rs +++ b/proxy/src/auth/backend/classic.rs @@ -66,7 +66,7 @@ pub(super) async fn authenticate( Ok(ComputeCredentials { info: creds, - keys: ComputeCredentialKeys::AuthKeys(tokio_postgres::config::AuthKeys::ScramSha256( + keys: ComputeCredentialKeys::AuthKeys(postgres_client::config::AuthKeys::ScramSha256( scram_keys, )), }) diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs index bf7a1cb0705f..575d60be8559 100644 --- a/proxy/src/auth/backend/console_redirect.rs +++ b/proxy/src/auth/backend/console_redirect.rs @@ -1,8 +1,8 @@ use async_trait::async_trait; +use postgres_client::config::SslMode; use pq_proto::BeMessage as Be; use thiserror::Error; use tokio::io::{AsyncRead, AsyncWrite}; -use tokio_postgres::config::SslMode; use tracing::{info, info_span}; use super::ComputeCredentialKeys; @@ -49,13 +49,19 @@ impl ReportableError for ConsoleRedirectError { } } -fn hello_message(redirect_uri: &reqwest::Url, session_id: &str) -> String { +fn hello_message( + redirect_uri: &reqwest::Url, + session_id: &str, + duration: std::time::Duration, +) -> String { + let formatted_duration = humantime::format_duration(duration).to_string(); format!( concat![ "Welcome to Neon!\n", - "Authenticate by visiting:\n", + "Authenticate by visiting (will expire in {duration}):\n", " {redirect_uri}{session_id}\n\n", ], + duration = formatted_duration, redirect_uri = redirect_uri, session_id = session_id, ) @@ -118,7 +124,11 @@ async fn authenticate( }; let span = info_span!("console_redirect", psql_session_id = &psql_session_id); - let greeting = hello_message(link_uri, &psql_session_id); + let greeting = hello_message( + link_uri, + &psql_session_id, + auth_config.console_redirect_confirmation_timeout, + ); // Give user a URL to spawn a new database. info!(parent: &span, "sending the auth URL to the user"); @@ -151,12 +161,8 @@ async fn authenticate( // This config should be self-contained, because we won't // take username or dbname from client's startup message. - let mut config = compute::ConnCfg::new(); - config - .host(&db_info.host) - .port(db_info.port) - .dbname(&db_info.dbname) - .user(&db_info.user); + let mut config = compute::ConnCfg::new(db_info.host.to_string(), db_info.port); + config.dbname(&db_info.dbname).user(&db_info.user); ctx.set_dbname(db_info.dbname.into()); ctx.set_user(db_info.user.into()); diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs index 517d4fd34bb8..a258090b1582 100644 --- a/proxy/src/auth/backend/jwt.rs +++ b/proxy/src/auth/backend/jwt.rs @@ -350,6 +350,13 @@ impl JwkCacheEntryLock { let header = base64::decode_config(header, base64::URL_SAFE_NO_PAD)?; let header = serde_json::from_slice::>(&header)?; + let payloadb = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)?; + let payload = serde_json::from_slice::>(&payloadb)?; + + if let Some(iss) = &payload.issuer { + ctx.set_jwt_issuer(iss.as_ref().to_owned()); + } + let sig = base64::decode_config(signature, base64::URL_SAFE_NO_PAD)?; let kid = header.key_id.ok_or(JwtError::MissingKeyId)?; @@ -388,9 +395,6 @@ impl JwkCacheEntryLock { key => return Err(JwtError::UnsupportedKeyType(key.into())), }; - let payloadb = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)?; - let payload = serde_json::from_slice::>(&payloadb)?; - tracing::debug!(?payload, "JWT signature valid with claims"); if let Some(aud) = expected_audience { diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs index 32e0f536153d..d4273fb52167 100644 --- a/proxy/src/auth/backend/local.rs +++ b/proxy/src/auth/backend/local.rs @@ -29,12 +29,7 @@ impl LocalBackend { api: http::Endpoint::new(compute_ctl, http::new_client()), }, node_info: NodeInfo { - config: { - let mut cfg = ConnCfg::new(); - cfg.host(&postgres_addr.ip().to_string()); - cfg.port(postgres_addr.port()); - cfg - }, + config: ConnCfg::new(postgres_addr.ip().to_string(), postgres_addr.port()), // TODO(conrad): make this better reflect compute info rather than endpoint info. aux: MetricsAuxInfo { endpoint_id: EndpointIdTag::get_interner().get_or_intern("local"), diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs index 7e1b26a11a0d..1bad7b308623 100644 --- a/proxy/src/auth/backend/mod.rs +++ b/proxy/src/auth/backend/mod.rs @@ -11,8 +11,8 @@ pub use console_redirect::ConsoleRedirectBackend; pub(crate) use console_redirect::ConsoleRedirectError; use ipnet::{Ipv4Net, Ipv6Net}; use local::LocalBackend; +use postgres_client::config::AuthKeys; use tokio::io::{AsyncRead, AsyncWrite}; -use tokio_postgres::config::AuthKeys; use tracing::{debug, info, warn}; use crate::auth::credentials::check_peer_addr_is_in_list; @@ -70,6 +70,10 @@ impl std::fmt::Display for Backend<'_, ()> { fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::ControlPlane(api, ()) => match &**api { + ControlPlaneClient::ProxyV1(endpoint) => fmt + .debug_tuple("ControlPlane::ProxyV1") + .field(&endpoint.url()) + .finish(), ControlPlaneClient::Neon(endpoint) => fmt .debug_tuple("ControlPlane::Neon") .field(&endpoint.url()) diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs index 9c6ce151cba9..60d1962d7f78 100644 --- a/proxy/src/auth/flow.rs +++ b/proxy/src/auth/flow.rs @@ -227,7 +227,7 @@ pub(crate) async fn validate_password_and_exchange( }; Ok(sasl::Outcome::Success(ComputeCredentialKeys::AuthKeys( - tokio_postgres::config::AuthKeys::ScramSha256(keys), + postgres_client::config::AuthKeys::ScramSha256(keys), ))) } } diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index b772a987ee68..99144acef094 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -3,14 +3,6 @@ use std::pin::pin; use std::sync::Arc; use anyhow::bail; -use aws_config::environment::EnvironmentVariableCredentialsProvider; -use aws_config::imds::credentials::ImdsCredentialsProvider; -use aws_config::meta::credentials::CredentialsProviderChain; -use aws_config::meta::region::RegionProviderChain; -use aws_config::profile::ProfileFileCredentialsProvider; -use aws_config::provider_config::ProviderConfig; -use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider; -use aws_config::Region; use futures::future::Either; use proxy::auth::backend::jwt::JwkCache; use proxy::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned}; @@ -54,6 +46,9 @@ enum AuthBackendType { #[value(name("console"), alias("cplane"))] ControlPlane, + #[value(name("cplane-v1"), alias("control-plane"))] + ControlPlaneV1, + #[value(name("link"), alias("control-redirect"))] ConsoleRedirect, @@ -314,39 +309,7 @@ async fn main() -> anyhow::Result<()> { }; info!("Using region: {}", args.aws_region); - let region_provider = - RegionProviderChain::default_provider().or_else(Region::new(args.aws_region.clone())); - let provider_conf = - ProviderConfig::without_region().with_region(region_provider.region().await); - let aws_credentials_provider = { - // uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY" - CredentialsProviderChain::first_try("env", EnvironmentVariableCredentialsProvider::new()) - // uses "AWS_PROFILE" / `aws sso login --profile ` - .or_else( - "profile-sso", - ProfileFileCredentialsProvider::builder() - .configure(&provider_conf) - .build(), - ) - // uses "AWS_WEB_IDENTITY_TOKEN_FILE", "AWS_ROLE_ARN", "AWS_ROLE_SESSION_NAME" - // needed to access remote extensions bucket - .or_else( - "token", - WebIdentityTokenCredentialsProvider::builder() - .configure(&provider_conf) - .build(), - ) - // uses imds v2 - .or_else("imds", ImdsCredentialsProvider::builder().build()) - }; - let elasticache_credentials_provider = Arc::new(elasticache::CredentialsProvider::new( - elasticache::AWSIRSAConfig::new( - args.aws_region.clone(), - args.redis_cluster_name, - args.redis_user_id, - ), - aws_credentials_provider, - )); + // TODO: untangle the config args let regional_redis_client = match (args.redis_auth_type.as_str(), &args.redis_notifications) { ("plain", redis_url) => match redis_url { None => { @@ -361,7 +324,12 @@ async fn main() -> anyhow::Result<()> { ConnectionWithCredentialsProvider::new_with_credentials_provider( host.to_string(), port, - elasticache_credentials_provider.clone(), + elasticache::CredentialsProvider::new( + args.aws_region, + args.redis_cluster_name, + args.redis_user_id, + ) + .await, ), ), (None, None) => { @@ -553,6 +521,39 @@ async fn main() -> anyhow::Result<()> { .instrument(span), ); } + } else if let proxy::control_plane::client::ControlPlaneClient::ProxyV1(api) = &**api { + match (redis_notifications_client, regional_redis_client.clone()) { + (None, None) => {} + (client1, client2) => { + let cache = api.caches.project_info.clone(); + if let Some(client) = client1 { + maintenance_tasks.spawn(notifications::task_main( + client, + cache.clone(), + cancel_map.clone(), + args.region.clone(), + )); + } + if let Some(client) = client2 { + maintenance_tasks.spawn(notifications::task_main( + client, + cache.clone(), + cancel_map.clone(), + args.region.clone(), + )); + } + maintenance_tasks.spawn(async move { cache.clone().gc_worker().await }); + } + } + if let Some(regional_redis_client) = regional_redis_client { + let cache = api.caches.endpoints_cache.clone(); + let con = regional_redis_client; + let span = tracing::info_span!("endpoints_cache"); + maintenance_tasks.spawn( + async move { cache.do_read(con, cancellation_token.clone()).await } + .instrument(span), + ); + } } } @@ -697,6 +698,65 @@ fn build_auth_backend( args: &ProxyCliArgs, ) -> anyhow::Result, &'static ConsoleRedirectBackend>> { match &args.auth_backend { + AuthBackendType::ControlPlaneV1 => { + let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?; + let project_info_cache_config: ProjectInfoCacheOptions = + args.project_info_cache.parse()?; + let endpoint_cache_config: config::EndpointCacheConfig = + args.endpoint_cache_config.parse()?; + + info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}"); + info!( + "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}" + ); + info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}"); + let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new( + wake_compute_cache_config, + project_info_cache_config, + endpoint_cache_config, + ))); + + let config::ConcurrencyLockOptions { + shards, + limiter, + epoch, + timeout, + } = args.wake_compute_lock.parse()?; + info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)"); + let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new( + "wake_compute_lock", + limiter, + shards, + timeout, + epoch, + &Metrics::get().wake_compute_lock, + )?)); + tokio::spawn(locks.garbage_collect_worker()); + + let url: proxy::url::ApiUrl = args.auth_endpoint.parse()?; + + let endpoint = http::Endpoint::new(url, http::new_client()); + + let mut wake_compute_rps_limit = args.wake_compute_limit.clone(); + RateBucketInfo::validate(&mut wake_compute_rps_limit)?; + let wake_compute_endpoint_rate_limiter = + Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit)); + + let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new( + endpoint, + args.control_plane_token.clone(), + caches, + locks, + wake_compute_endpoint_rate_limiter, + ); + + let api = control_plane::client::ControlPlaneClient::ProxyV1(api); + let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ()); + let config = Box::leak(Box::new(auth_backend)); + + Ok(Either::Left(config)) + } + AuthBackendType::ControlPlane => { let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?; let project_info_cache_config: ProjectInfoCacheOptions = @@ -732,13 +792,15 @@ fn build_auth_backend( )?)); tokio::spawn(locks.garbage_collect_worker()); - let url = args.auth_endpoint.parse()?; + let url: proxy::url::ApiUrl = args.auth_endpoint.parse()?; + let endpoint = http::Endpoint::new(url, http::new_client()); let mut wake_compute_rps_limit = args.wake_compute_limit.clone(); RateBucketInfo::validate(&mut wake_compute_rps_limit)?; let wake_compute_endpoint_rate_limiter = Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit)); + let api = control_plane::client::neon::NeonControlPlaneClient::new( endpoint, args.control_plane_token.clone(), diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index 91e198bf88a2..7bc5587a2535 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -3,11 +3,11 @@ use std::sync::Arc; use dashmap::DashMap; use ipnet::{IpNet, Ipv4Net, Ipv6Net}; +use postgres_client::{CancelToken, NoTls}; use pq_proto::CancelKeyData; use thiserror::Error; use tokio::net::TcpStream; use tokio::sync::Mutex; -use tokio_postgres::{CancelToken, NoTls}; use tracing::{debug, info}; use uuid::Uuid; @@ -44,7 +44,7 @@ pub(crate) enum CancelError { IO(#[from] std::io::Error), #[error("{0}")] - Postgres(#[from] tokio_postgres::Error), + Postgres(#[from] postgres_client::Error), #[error("rate limit exceeded")] RateLimit, @@ -70,11 +70,12 @@ impl ReportableError for CancelError { impl CancellationHandler

{ /// Run async action within an ephemeral session identified by [`CancelKeyData`]. pub(crate) fn get_session(self: Arc) -> Session

{ - // HACK: We'd rather get the real backend_pid but tokio_postgres doesn't - // expose it and we don't want to do another roundtrip to query - // for it. The client will be able to notice that this is not the - // actual backend_pid, but backend_pid is not used for anything - // so it doesn't matter. + // we intentionally generate a random "backend pid" and "secret key" here. + // we use the corresponding u64 as an identifier for the + // actual endpoint+pid+secret for postgres/pgbouncer. + // + // if we forwarded the backend_pid from postgres to the client, there would be a lot + // of overlap between our computes as most pids are small (~100). let key = loop { let key = rand::random(); diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 2abe88ac880f..4113b5bb80e3 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -6,13 +6,15 @@ use std::time::Duration; use futures::{FutureExt, TryFutureExt}; use itertools::Itertools; use once_cell::sync::OnceCell; +use postgres_client::tls::MakeTlsConnect; +use postgres_client::{CancelToken, RawConnection}; +use postgres_protocol::message::backend::NoticeResponseBody; use pq_proto::StartupMessageParams; use rustls::client::danger::ServerCertVerifier; use rustls::crypto::ring; use rustls::pki_types::InvalidDnsNameError; use thiserror::Error; use tokio::net::TcpStream; -use tokio_postgres::tls::MakeTlsConnect; use tracing::{debug, error, info, warn}; use crate::auth::parse_endpoint_param; @@ -32,9 +34,9 @@ pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node"; #[derive(Debug, Error)] pub(crate) enum ConnectionError { /// This error doesn't seem to reveal any secrets; for instance, - /// `tokio_postgres::error::Kind` doesn't contain ip addresses and such. + /// `postgres_client::error::Kind` doesn't contain ip addresses and such. #[error("{COULD_NOT_CONNECT}: {0}")] - Postgres(#[from] tokio_postgres::Error), + Postgres(#[from] postgres_client::Error), #[error("{COULD_NOT_CONNECT}: {0}")] CouldNotConnect(#[from] io::Error), @@ -97,18 +99,18 @@ impl ReportableError for ConnectionError { } /// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`. -pub(crate) type ScramKeys = tokio_postgres::config::ScramKeys<32>; +pub(crate) type ScramKeys = postgres_client::config::ScramKeys<32>; /// A config for establishing a connection to compute node. -/// Eventually, `tokio_postgres` will be replaced with something better. +/// Eventually, `postgres_client` will be replaced with something better. /// Newtype allows us to implement methods on top of it. -#[derive(Clone, Default)] -pub(crate) struct ConnCfg(Box); +#[derive(Clone)] +pub(crate) struct ConnCfg(Box); /// Creation and initialization routines. impl ConnCfg { - pub(crate) fn new() -> Self { - Self::default() + pub(crate) fn new(host: String, port: u16) -> Self { + Self(Box::new(postgres_client::Config::new(host, port))) } /// Reuse password or auth keys from the other config. @@ -122,65 +124,49 @@ impl ConnCfg { } } - pub(crate) fn get_host(&self) -> Result { - match self.0.get_hosts() { - [tokio_postgres::config::Host::Tcp(s)] => Ok(s.into()), - // we should not have multiple address or unix addresses. - _ => Err(WakeComputeError::BadComputeAddress( - "invalid compute address".into(), - )), + pub(crate) fn get_host(&self) -> Host { + match self.0.get_host() { + postgres_client::config::Host::Tcp(s) => s.into(), } } /// Apply startup message params to the connection config. - pub(crate) fn set_startup_params(&mut self, params: &StartupMessageParams) { - // Only set `user` if it's not present in the config. - // Console redirect auth flow takes username from the console's response. - if let (None, Some(user)) = (self.get_user(), params.get("user")) { - self.user(user); + pub(crate) fn set_startup_params( + &mut self, + params: &StartupMessageParams, + arbitrary_params: bool, + ) { + if !arbitrary_params { + self.set_param("client_encoding", "UTF8"); } - - // Only set `dbname` if it's not present in the config. - // Console redirect auth flow takes dbname from the console's response. - if let (None, Some(dbname)) = (self.get_dbname(), params.get("database")) { - self.dbname(dbname); - } - - // Don't add `options` if they were only used for specifying a project. - // Connection pools don't support `options`, because they affect backend startup. - if let Some(options) = filtered_options(params) { - self.options(&options); - } - - if let Some(app_name) = params.get("application_name") { - self.application_name(app_name); - } - - // TODO: This is especially ugly... - if let Some(replication) = params.get("replication") { - use tokio_postgres::config::ReplicationMode; - match replication { - "true" | "on" | "yes" | "1" => { - self.replication_mode(ReplicationMode::Physical); + for (k, v) in params.iter() { + match k { + // Only set `user` if it's not present in the config. + // Console redirect auth flow takes username from the console's response. + "user" if self.user_is_set() => continue, + "database" if self.db_is_set() => continue, + "options" => { + if let Some(options) = filtered_options(v) { + self.set_param(k, &options); + } + } + "user" | "database" | "application_name" | "replication" => { + self.set_param(k, v); } - "database" => { - self.replication_mode(ReplicationMode::Logical); + + // if we allow arbitrary params, then we forward them through. + // this is a flag for a period of backwards compatibility + k if arbitrary_params => { + self.set_param(k, v); } - _other => {} + _ => {} } } - - // TODO: extend the list of the forwarded startup parameters. - // Currently, tokio-postgres doesn't allow us to pass - // arbitrary parameters, but the ones above are a good start. - // - // This and the reverse params problem can be better addressed - // in a bespoke connection machinery (a new library for that sake). } } impl std::ops::Deref for ConnCfg { - type Target = tokio_postgres::Config; + type Target = postgres_client::Config; fn deref(&self) -> &Self::Target { &self.0 @@ -197,7 +183,7 @@ impl std::ops::DerefMut for ConnCfg { impl ConnCfg { /// Establish a raw TCP connection to the compute node. async fn connect_raw(&self, timeout: Duration) -> io::Result<(SocketAddr, TcpStream, &str)> { - use tokio_postgres::config::Host; + use postgres_client::config::Host; // wrap TcpStream::connect with timeout let connect_with_timeout = |host, port| { @@ -222,46 +208,23 @@ impl ConnCfg { }) }; - // We can't reuse connection establishing logic from `tokio_postgres` here, + // We can't reuse connection establishing logic from `postgres_client` here, // because it has no means for extracting the underlying socket which we // require for our business. - let mut connection_error = None; - let ports = self.0.get_ports(); - let hosts = self.0.get_hosts(); - // the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array - if ports.len() > 1 && ports.len() != hosts.len() { - return Err(io::Error::new( - io::ErrorKind::Other, - format!( - "bad compute config, \ - ports and hosts entries' count does not match: {:?}", - self.0 - ), - )); - } + let port = self.0.get_port(); + let host = self.0.get_host(); - for (i, host) in hosts.iter().enumerate() { - let port = ports.get(i).or_else(|| ports.first()).unwrap_or(&5432); - let host = match host { - Host::Tcp(host) => host.as_str(), - }; - - match connect_once(host, *port).await { - Ok((sockaddr, stream)) => return Ok((sockaddr, stream, host)), - Err(err) => { - // We can't throw an error here, as there might be more hosts to try. - warn!("couldn't connect to compute node at {host}:{port}: {err}"); - connection_error = Some(err); - } + let host = match host { + Host::Tcp(host) => host.as_str(), + }; + + match connect_once(host, port).await { + Ok((sockaddr, stream)) => Ok((sockaddr, stream, host)), + Err(err) => { + warn!("couldn't connect to compute node at {host}:{port}: {err}"); + Err(err) } } - - Err(connection_error.unwrap_or_else(|| { - io::Error::new( - io::ErrorKind::Other, - format!("bad compute config: {:?}", self.0), - ) - })) } } @@ -270,13 +233,15 @@ type RustlsStream = > pub(crate) struct PostgresConnection { /// Socket connected to a compute node. pub(crate) stream: - tokio_postgres::maybe_tls_stream::MaybeTlsStream, + postgres_client::maybe_tls_stream::MaybeTlsStream, /// PostgreSQL connection parameters. pub(crate) params: std::collections::HashMap, /// Query cancellation token. pub(crate) cancel_closure: CancelClosure, /// Labels for proxy's metrics. pub(crate) aux: MetricsAuxInfo, + /// Notices received from compute after authenticating + pub(crate) delayed_notice: Vec, _guage: NumDbConnectionsGuard<'static>, } @@ -322,10 +287,19 @@ impl ConnCfg { // connect_raw() will not use TLS if sslmode is "disable" let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); - let (client, connection) = self.0.connect_raw(stream, tls).await?; + let connection = self.0.connect_raw(stream, tls).await?; drop(pause); - tracing::Span::current().record("pid", tracing::field::display(client.get_process_id())); - let stream = connection.stream.into_inner(); + + let RawConnection { + stream, + parameters, + delayed_notice, + process_id, + secret_key, + } = connection; + + tracing::Span::current().record("pid", tracing::field::display(process_id)); + let stream = stream.into_inner(); // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?) info!( @@ -334,18 +308,23 @@ impl ConnCfg { self.0.get_ssl_mode() ); - // This is very ugly but as of now there's no better way to - // extract the connection parameters from tokio-postgres' connection. - // TODO: solve this problem in a more elegant manner (e.g. the new library). - let params = connection.parameters; - // NB: CancelToken is supposed to hold socket_addr, but we use connect_raw. // Yet another reason to rework the connection establishing code. - let cancel_closure = CancelClosure::new(socket_addr, client.cancel_token(), vec![]); + let cancel_closure = CancelClosure::new( + socket_addr, + CancelToken { + socket_config: None, + ssl_mode: self.0.get_ssl_mode(), + process_id, + secret_key, + }, + vec![], + ); let connection = PostgresConnection { stream, - params, + params: parameters, + delayed_notice, cancel_closure, aux, _guage: Metrics::get().proxy.db_connections.guard(ctx.protocol()), @@ -356,10 +335,9 @@ impl ConnCfg { } /// Retrieve `options` from a startup message, dropping all proxy-secific flags. -fn filtered_options(params: &StartupMessageParams) -> Option { +fn filtered_options(options: &str) -> Option { #[allow(unstable_name_collisions)] - let options: String = params - .options_raw()? + let options: String = StartupMessageParams::parse_options_raw(options) .filter(|opt| parse_endpoint_param(opt).is_none() && neon_option(opt).is_none()) .intersperse(" ") // TODO: use impl from std once it's stabilized .collect(); @@ -436,27 +414,24 @@ mod tests { #[test] fn test_filtered_options() { // Empty options is unlikely to be useful anyway. - let params = StartupMessageParams::new([("options", "")]); - assert_eq!(filtered_options(¶ms), None); + let params = ""; + assert_eq!(filtered_options(params), None); // It's likely that clients will only use options to specify endpoint/project. - let params = StartupMessageParams::new([("options", "project=foo")]); - assert_eq!(filtered_options(¶ms), None); + let params = "project=foo"; + assert_eq!(filtered_options(params), None); // Same, because unescaped whitespaces are no-op. - let params = StartupMessageParams::new([("options", " project=foo ")]); - assert_eq!(filtered_options(¶ms).as_deref(), None); + let params = " project=foo "; + assert_eq!(filtered_options(params).as_deref(), None); - let params = StartupMessageParams::new([("options", r"\ project=foo \ ")]); - assert_eq!(filtered_options(¶ms).as_deref(), Some(r"\ \ ")); + let params = r"\ project=foo \ "; + assert_eq!(filtered_options(params).as_deref(), Some(r"\ \ ")); - let params = StartupMessageParams::new([("options", "project = foo")]); - assert_eq!(filtered_options(¶ms).as_deref(), Some("project = foo")); + let params = "project = foo"; + assert_eq!(filtered_options(params).as_deref(), Some("project = foo")); - let params = StartupMessageParams::new([( - "options", - "project = foo neon_endpoint_type:read_write neon_lsn:0/2", - )]); - assert_eq!(filtered_options(¶ms).as_deref(), Some("project = foo")); + let params = "project = foo neon_endpoint_type:read_write neon_lsn:0/2 neon_proxy_params_compat:true"; + assert_eq!(filtered_options(params).as_deref(), Some("project = foo")); } } diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index 8f78df19649b..7db1179eeae8 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -206,6 +206,7 @@ pub(crate) async fn handle_client( let mut node = connect_to_compute( ctx, &TcpMechanism { + params_compat: true, params: ¶ms, locks: &config.connect_compute_locks, }, diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index 4a063a5faa15..a9fb513d3ceb 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -57,6 +57,7 @@ struct RequestContextInner { application: Option, error_kind: Option, pub(crate) auth_method: Option, + jwt_issuer: Option, success: bool, pub(crate) cold_start_info: ColdStartInfo, pg_options: Option, @@ -79,6 +80,7 @@ pub(crate) enum AuthMethod { ScramSha256, ScramSha256Plus, Cleartext, + Jwt, } impl Clone for RequestContext { @@ -100,6 +102,7 @@ impl Clone for RequestContext { application: inner.application.clone(), error_kind: inner.error_kind, auth_method: inner.auth_method.clone(), + jwt_issuer: inner.jwt_issuer.clone(), success: inner.success, rejected: inner.rejected, cold_start_info: inner.cold_start_info, @@ -148,6 +151,7 @@ impl RequestContext { application: None, error_kind: None, auth_method: None, + jwt_issuer: None, success: false, rejected: None, cold_start_info: ColdStartInfo::Unknown, @@ -246,6 +250,11 @@ impl RequestContext { this.auth_method = Some(auth_method); } + pub(crate) fn set_jwt_issuer(&self, jwt_issuer: String) { + let mut this = self.0.try_lock().expect("should not deadlock"); + this.jwt_issuer = Some(jwt_issuer); + } + pub fn has_private_peer_addr(&self) -> bool { self.0 .try_lock() diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs index b375eb886e09..3105d085260d 100644 --- a/proxy/src/context/parquet.rs +++ b/proxy/src/context/parquet.rs @@ -87,6 +87,8 @@ pub(crate) struct RequestData { branch: Option, pg_options: Option, auth_method: Option<&'static str>, + jwt_issuer: Option, + error: Option<&'static str>, /// Success is counted if we form a HTTP response with sql rows inside /// Or if we make it to proxy_pass @@ -138,7 +140,9 @@ impl From<&RequestContextInner> for RequestData { super::AuthMethod::ScramSha256 => "scram_sha_256", super::AuthMethod::ScramSha256Plus => "scram_sha_256_plus", super::AuthMethod::Cleartext => "cleartext", + super::AuthMethod::Jwt => "jwt", }), + jwt_issuer: value.jwt_issuer.clone(), protocol: value.protocol.as_str(), region: value.region, error: value.error_kind.as_ref().map(|e| e.to_metric_label()), @@ -519,6 +523,7 @@ mod tests { branch: Some(hex::encode(rng.gen::<[u8; 16]>())), pg_options: None, auth_method: None, + jwt_issuer: None, protocol: ["tcp", "ws", "http"][rng.gen_range(0..3)], region: "us-east-1", error: None, @@ -599,15 +604,15 @@ mod tests { assert_eq!( file_stats, [ - (1312632, 3, 6000), - (1312621, 3, 6000), - (1312680, 3, 6000), - (1312637, 3, 6000), - (1312773, 3, 6000), - (1312610, 3, 6000), - (1312404, 3, 6000), - (1312639, 3, 6000), - (437848, 1, 2000) + (1313105, 3, 6000), + (1313094, 3, 6000), + (1313153, 3, 6000), + (1313110, 3, 6000), + (1313246, 3, 6000), + (1313083, 3, 6000), + (1312877, 3, 6000), + (1313112, 3, 6000), + (438020, 1, 2000) ] ); @@ -639,11 +644,11 @@ mod tests { assert_eq!( file_stats, [ - (1203465, 5, 10000), - (1203189, 5, 10000), - (1203490, 5, 10000), - (1203475, 5, 10000), - (1203729, 5, 10000) + (1204324, 5, 10000), + (1204048, 5, 10000), + (1204349, 5, 10000), + (1204334, 5, 10000), + (1204588, 5, 10000) ] ); @@ -668,15 +673,15 @@ mod tests { assert_eq!( file_stats, [ - (1312632, 3, 6000), - (1312621, 3, 6000), - (1312680, 3, 6000), - (1312637, 3, 6000), - (1312773, 3, 6000), - (1312610, 3, 6000), - (1312404, 3, 6000), - (1312639, 3, 6000), - (437848, 1, 2000) + (1313105, 3, 6000), + (1313094, 3, 6000), + (1313153, 3, 6000), + (1313110, 3, 6000), + (1313246, 3, 6000), + (1313083, 3, 6000), + (1312877, 3, 6000), + (1313112, 3, 6000), + (438020, 1, 2000) ] ); @@ -713,7 +718,7 @@ mod tests { // files are smaller than the size threshold, but they took too long to fill so were flushed early assert_eq!( file_stats, - [(657696, 2, 3001), (657410, 2, 3000), (657206, 2, 2999)] + [(658014, 2, 3001), (657728, 2, 3000), (657524, 2, 2999)] ); tmpdir.close().unwrap(); diff --git a/proxy/src/control_plane/client/cplane_proxy_v1.rs b/proxy/src/control_plane/client/cplane_proxy_v1.rs new file mode 100644 index 000000000000..e33a37f64366 --- /dev/null +++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs @@ -0,0 +1,514 @@ +//! Production console backend. + +use std::sync::Arc; +use std::time::Duration; + +use ::http::header::AUTHORIZATION; +use ::http::HeaderName; +use futures::TryFutureExt; +use postgres_client::config::SslMode; +use tokio::time::Instant; +use tracing::{debug, info, info_span, warn, Instrument}; + +use super::super::messages::{ControlPlaneErrorMessage, GetEndpointAccessControl, WakeCompute}; +use crate::auth::backend::jwt::AuthRule; +use crate::auth::backend::ComputeUserInfo; +use crate::cache::Cached; +use crate::context::RequestContext; +use crate::control_plane::caches::ApiCaches; +use crate::control_plane::errors::{ + ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError, +}; +use crate::control_plane::locks::ApiLocks; +use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason}; +use crate::control_plane::{ + AuthInfo, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo, +}; +use crate::metrics::{CacheOutcome, Metrics}; +use crate::rate_limiter::WakeComputeRateLimiter; +use crate::types::{EndpointCacheKey, EndpointId}; +use crate::{compute, http, scram}; + +const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id"); + +#[derive(Clone)] +pub struct NeonControlPlaneClient { + endpoint: http::Endpoint, + pub caches: &'static ApiCaches, + pub(crate) locks: &'static ApiLocks, + pub(crate) wake_compute_endpoint_rate_limiter: Arc, + // put in a shared ref so we don't copy secrets all over in memory + jwt: Arc, +} + +impl NeonControlPlaneClient { + /// Construct an API object containing the auth parameters. + pub fn new( + endpoint: http::Endpoint, + jwt: Arc, + caches: &'static ApiCaches, + locks: &'static ApiLocks, + wake_compute_endpoint_rate_limiter: Arc, + ) -> Self { + Self { + endpoint, + caches, + locks, + wake_compute_endpoint_rate_limiter, + jwt, + } + } + + pub(crate) fn url(&self) -> &str { + self.endpoint.url().as_str() + } + + async fn do_get_auth_info( + &self, + ctx: &RequestContext, + user_info: &ComputeUserInfo, + ) -> Result { + if !self + .caches + .endpoints_cache + .is_valid(ctx, &user_info.endpoint.normalize()) + { + // TODO: refactor this because it's weird + // this is a failure to authenticate but we return Ok. + info!("endpoint is not valid, skipping the request"); + return Ok(AuthInfo::default()); + } + let request_id = ctx.session_id().to_string(); + let application_name = ctx.console_application_name(); + async { + let request = self + .endpoint + .get_path("get_endpoint_access_control") + .header(X_REQUEST_ID, &request_id) + .header(AUTHORIZATION, format!("Bearer {}", &self.jwt)) + .query(&[("session_id", ctx.session_id())]) + .query(&[ + ("application_name", application_name.as_str()), + ("endpointish", user_info.endpoint.as_str()), + ("role", user_info.user.as_str()), + ]) + .build()?; + + debug!(url = request.url().as_str(), "sending http request"); + let start = Instant::now(); + let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); + let response = self.endpoint.execute(request).await?; + drop(pause); + info!(duration = ?start.elapsed(), "received http response"); + let body = match parse_body::(response).await { + Ok(body) => body, + // Error 404 is special: it's ok not to have a secret. + // TODO(anna): retry + Err(e) => { + return if e.get_reason().is_not_found() { + // TODO: refactor this because it's weird + // this is a failure to authenticate but we return Ok. + Ok(AuthInfo::default()) + } else { + Err(e.into()) + }; + } + }; + + // Ivan: don't know where it will be used, so I leave it here + let _endpoint_vpc_ids = body.allowed_vpc_endpoint_ids.unwrap_or_default(); + + let secret = if body.role_secret.is_empty() { + None + } else { + let secret = scram::ServerSecret::parse(&body.role_secret) + .map(AuthSecret::Scram) + .ok_or(GetAuthInfoError::BadSecret)?; + Some(secret) + }; + let allowed_ips = body.allowed_ips.unwrap_or_default(); + Metrics::get() + .proxy + .allowed_ips_number + .observe(allowed_ips.len() as f64); + Ok(AuthInfo { + secret, + allowed_ips, + project_id: body.project_id, + }) + } + .inspect_err(|e| tracing::debug!(error = ?e)) + .instrument(info_span!("do_get_auth_info")) + .await + } + + async fn do_get_endpoint_jwks( + &self, + ctx: &RequestContext, + endpoint: EndpointId, + ) -> Result, GetEndpointJwksError> { + if !self + .caches + .endpoints_cache + .is_valid(ctx, &endpoint.normalize()) + { + return Err(GetEndpointJwksError::EndpointNotFound); + } + let request_id = ctx.session_id().to_string(); + async { + let request = self + .endpoint + .get_with_url(|url| { + url.path_segments_mut() + .push("endpoints") + .push(endpoint.as_str()) + .push("jwks"); + }) + .header(X_REQUEST_ID, &request_id) + .header(AUTHORIZATION, format!("Bearer {}", &self.jwt)) + .query(&[("session_id", ctx.session_id())]) + .build() + .map_err(GetEndpointJwksError::RequestBuild)?; + + debug!(url = request.url().as_str(), "sending http request"); + let start = Instant::now(); + let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); + let response = self + .endpoint + .execute(request) + .await + .map_err(GetEndpointJwksError::RequestExecute)?; + drop(pause); + info!(duration = ?start.elapsed(), "received http response"); + + let body = parse_body::(response).await?; + + let rules = body + .jwks + .into_iter() + .map(|jwks| AuthRule { + id: jwks.id, + jwks_url: jwks.jwks_url, + audience: jwks.jwt_audience, + role_names: jwks.role_names, + }) + .collect(); + + Ok(rules) + } + .inspect_err(|e| tracing::debug!(error = ?e)) + .instrument(info_span!("do_get_endpoint_jwks")) + .await + } + + async fn do_wake_compute( + &self, + ctx: &RequestContext, + user_info: &ComputeUserInfo, + ) -> Result { + let request_id = ctx.session_id().to_string(); + let application_name = ctx.console_application_name(); + async { + let mut request_builder = self + .endpoint + .get_path("wake_compute") + .header("X-Request-ID", &request_id) + .header("Authorization", format!("Bearer {}", &self.jwt)) + .query(&[("session_id", ctx.session_id())]) + .query(&[ + ("application_name", application_name.as_str()), + ("endpointish", user_info.endpoint.as_str()), + ]); + + let options = user_info.options.to_deep_object(); + if !options.is_empty() { + request_builder = request_builder.query(&options); + } + + let request = request_builder.build()?; + + debug!(url = request.url().as_str(), "sending http request"); + let start = Instant::now(); + let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); + let response = self.endpoint.execute(request).await?; + drop(pause); + info!(duration = ?start.elapsed(), "received http response"); + let body = parse_body::(response).await?; + + // Unfortunately, ownership won't let us use `Option::ok_or` here. + let (host, port) = match parse_host_port(&body.address) { + None => return Err(WakeComputeError::BadComputeAddress(body.address)), + Some(x) => x, + }; + + // Don't set anything but host and port! This config will be cached. + // We'll set username and such later using the startup message. + // TODO: add more type safety (in progress). + let mut config = compute::ConnCfg::new(host.to_owned(), port); + config.ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes. + + let node = NodeInfo { + config, + aux: body.aux, + allow_self_signed_compute: false, + }; + + Ok(node) + } + .inspect_err(|e| tracing::debug!(error = ?e)) + .instrument(info_span!("do_wake_compute")) + .await + } +} + +impl super::ControlPlaneApi for NeonControlPlaneClient { + #[tracing::instrument(skip_all)] + async fn get_role_secret( + &self, + ctx: &RequestContext, + user_info: &ComputeUserInfo, + ) -> Result { + let normalized_ep = &user_info.endpoint.normalize(); + let user = &user_info.user; + if let Some(role_secret) = self + .caches + .project_info + .get_role_secret(normalized_ep, user) + { + return Ok(role_secret); + } + let auth_info = self.do_get_auth_info(ctx, user_info).await?; + if let Some(project_id) = auth_info.project_id { + let normalized_ep_int = normalized_ep.into(); + self.caches.project_info.insert_role_secret( + project_id, + normalized_ep_int, + user.into(), + auth_info.secret.clone(), + ); + self.caches.project_info.insert_allowed_ips( + project_id, + normalized_ep_int, + Arc::new(auth_info.allowed_ips), + ); + ctx.set_project_id(project_id); + } + // When we just got a secret, we don't need to invalidate it. + Ok(Cached::new_uncached(auth_info.secret)) + } + + async fn get_allowed_ips_and_secret( + &self, + ctx: &RequestContext, + user_info: &ComputeUserInfo, + ) -> Result<(CachedAllowedIps, Option), GetAuthInfoError> { + let normalized_ep = &user_info.endpoint.normalize(); + if let Some(allowed_ips) = self.caches.project_info.get_allowed_ips(normalized_ep) { + Metrics::get() + .proxy + .allowed_ips_cache_misses + .inc(CacheOutcome::Hit); + return Ok((allowed_ips, None)); + } + Metrics::get() + .proxy + .allowed_ips_cache_misses + .inc(CacheOutcome::Miss); + let auth_info = self.do_get_auth_info(ctx, user_info).await?; + let allowed_ips = Arc::new(auth_info.allowed_ips); + let user = &user_info.user; + if let Some(project_id) = auth_info.project_id { + let normalized_ep_int = normalized_ep.into(); + self.caches.project_info.insert_role_secret( + project_id, + normalized_ep_int, + user.into(), + auth_info.secret.clone(), + ); + self.caches.project_info.insert_allowed_ips( + project_id, + normalized_ep_int, + allowed_ips.clone(), + ); + ctx.set_project_id(project_id); + } + Ok(( + Cached::new_uncached(allowed_ips), + Some(Cached::new_uncached(auth_info.secret)), + )) + } + + #[tracing::instrument(skip_all)] + async fn get_endpoint_jwks( + &self, + ctx: &RequestContext, + endpoint: EndpointId, + ) -> Result, GetEndpointJwksError> { + self.do_get_endpoint_jwks(ctx, endpoint).await + } + + #[tracing::instrument(skip_all)] + async fn wake_compute( + &self, + ctx: &RequestContext, + user_info: &ComputeUserInfo, + ) -> Result { + let key = user_info.endpoint_cache_key(); + + macro_rules! check_cache { + () => { + if let Some(cached) = self.caches.node_info.get(&key) { + let (cached, info) = cached.take_value(); + let info = info.map_err(|c| { + info!(key = &*key, "found cached wake_compute error"); + WakeComputeError::ControlPlane(ControlPlaneError::Message(Box::new(*c))) + })?; + + debug!(key = &*key, "found cached compute node info"); + ctx.set_project(info.aux.clone()); + return Ok(cached.map(|()| info)); + } + }; + } + + // Every time we do a wakeup http request, the compute node will stay up + // for some time (highly depends on the console's scale-to-zero policy); + // The connection info remains the same during that period of time, + // which means that we might cache it to reduce the load and latency. + check_cache!(); + + let permit = self.locks.get_permit(&key).await?; + + // after getting back a permit - it's possible the cache was filled + // double check + if permit.should_check_cache() { + // TODO: if there is something in the cache, mark the permit as success. + check_cache!(); + } + + // check rate limit + if !self + .wake_compute_endpoint_rate_limiter + .check(user_info.endpoint.normalize_intern(), 1) + { + return Err(WakeComputeError::TooManyConnections); + } + + let node = permit.release_result(self.do_wake_compute(ctx, user_info).await); + match node { + Ok(node) => { + ctx.set_project(node.aux.clone()); + debug!(key = &*key, "created a cache entry for woken compute node"); + + let mut stored_node = node.clone(); + // store the cached node as 'warm_cached' + stored_node.aux.cold_start_info = ColdStartInfo::WarmCached; + + let (_, cached) = self.caches.node_info.insert_unit(key, Ok(stored_node)); + + Ok(cached.map(|()| node)) + } + Err(err) => match err { + WakeComputeError::ControlPlane(ControlPlaneError::Message(err)) => { + let Some(status) = &err.status else { + return Err(WakeComputeError::ControlPlane(ControlPlaneError::Message( + err, + ))); + }; + + let reason = status + .details + .error_info + .map_or(Reason::Unknown, |x| x.reason); + + // if we can retry this error, do not cache it. + if reason.can_retry() { + return Err(WakeComputeError::ControlPlane(ControlPlaneError::Message( + err, + ))); + } + + // at this point, we should only have quota errors. + debug!( + key = &*key, + "created a cache entry for the wake compute error" + ); + + self.caches.node_info.insert_ttl( + key, + Err(err.clone()), + Duration::from_secs(30), + ); + + Err(WakeComputeError::ControlPlane(ControlPlaneError::Message( + err, + ))) + } + err => return Err(err), + }, + } + } +} + +/// Parse http response body, taking status code into account. +async fn parse_body serde::Deserialize<'a>>( + response: http::Response, +) -> Result { + let status = response.status(); + if status.is_success() { + // We shouldn't log raw body because it may contain secrets. + info!("request succeeded, processing the body"); + return Ok(response.json().await?); + } + let s = response.bytes().await?; + // Log plaintext to be able to detect, whether there are some cases not covered by the error struct. + info!("response_error plaintext: {:?}", s); + + // Don't throw an error here because it's not as important + // as the fact that the request itself has failed. + let mut body = serde_json::from_slice(&s).unwrap_or_else(|e| { + warn!("failed to parse error body: {e}"); + ControlPlaneErrorMessage { + error: "reason unclear (malformed error message)".into(), + http_status_code: status, + status: None, + } + }); + body.http_status_code = status; + + warn!("console responded with an error ({status}): {body:?}"); + Err(ControlPlaneError::Message(Box::new(body))) +} + +fn parse_host_port(input: &str) -> Option<(&str, u16)> { + let (host, port) = input.rsplit_once(':')?; + let ipv6_brackets: &[_] = &['[', ']']; + Some((host.trim_matches(ipv6_brackets), port.parse().ok()?)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_host_port_v4() { + let (host, port) = parse_host_port("127.0.0.1:5432").expect("failed to parse"); + assert_eq!(host, "127.0.0.1"); + assert_eq!(port, 5432); + } + + #[test] + fn test_parse_host_port_v6() { + let (host, port) = parse_host_port("[2001:db8::1]:5432").expect("failed to parse"); + assert_eq!(host, "2001:db8::1"); + assert_eq!(port, 5432); + } + + #[test] + fn test_parse_host_port_url() { + let (host, port) = parse_host_port("compute-foo-bar-1234.default.svc.cluster.local:5432") + .expect("failed to parse"); + assert_eq!(host, "compute-foo-bar-1234.default.svc.cluster.local"); + assert_eq!(port, 5432); + } +} diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs index 9537d717a1f1..eaf692ab279b 100644 --- a/proxy/src/control_plane/client/mock.rs +++ b/proxy/src/control_plane/client/mock.rs @@ -5,7 +5,6 @@ use std::sync::Arc; use futures::TryFutureExt; use thiserror::Error; -use tokio_postgres::config::SslMode; use tokio_postgres::Client; use tracing::{error, info, info_span, warn, Instrument}; @@ -161,11 +160,11 @@ impl MockControlPlane { } async fn do_wake_compute(&self) -> Result { - let mut config = compute::ConnCfg::new(); - config - .host(self.endpoint.host_str().unwrap_or("localhost")) - .port(self.endpoint.port().unwrap_or(5432)) - .ssl_mode(SslMode::Disable); + let mut config = compute::ConnCfg::new( + self.endpoint.host_str().unwrap_or("localhost").to_owned(), + self.endpoint.port().unwrap_or(5432), + ); + config.ssl_mode(postgres_client::config::SslMode::Disable); let node = NodeInfo { config, diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs index f8f74372f0a2..7ef5a9c9fd68 100644 --- a/proxy/src/control_plane/client/mod.rs +++ b/proxy/src/control_plane/client/mod.rs @@ -1,3 +1,4 @@ +pub mod cplane_proxy_v1; #[cfg(any(test, feature = "testing"))] pub mod mock; pub mod neon; @@ -27,6 +28,8 @@ use crate::types::EndpointId; #[non_exhaustive] #[derive(Clone)] pub enum ControlPlaneClient { + /// New Proxy V1 control plane API + ProxyV1(cplane_proxy_v1::NeonControlPlaneClient), /// Current Management API (V2). Neon(neon::NeonControlPlaneClient), /// Local mock control plane. @@ -45,6 +48,7 @@ impl ControlPlaneApi for ControlPlaneClient { user_info: &ComputeUserInfo, ) -> Result { match self { + Self::ProxyV1(api) => api.get_role_secret(ctx, user_info).await, Self::Neon(api) => api.get_role_secret(ctx, user_info).await, #[cfg(any(test, feature = "testing"))] Self::PostgresMock(api) => api.get_role_secret(ctx, user_info).await, @@ -61,6 +65,7 @@ impl ControlPlaneApi for ControlPlaneClient { user_info: &ComputeUserInfo, ) -> Result<(CachedAllowedIps, Option), errors::GetAuthInfoError> { match self { + Self::ProxyV1(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, Self::Neon(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, #[cfg(any(test, feature = "testing"))] Self::PostgresMock(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, @@ -75,6 +80,7 @@ impl ControlPlaneApi for ControlPlaneClient { endpoint: EndpointId, ) -> Result, errors::GetEndpointJwksError> { match self { + Self::ProxyV1(api) => api.get_endpoint_jwks(ctx, endpoint).await, Self::Neon(api) => api.get_endpoint_jwks(ctx, endpoint).await, #[cfg(any(test, feature = "testing"))] Self::PostgresMock(api) => api.get_endpoint_jwks(ctx, endpoint).await, @@ -89,6 +95,7 @@ impl ControlPlaneApi for ControlPlaneClient { user_info: &ComputeUserInfo, ) -> Result { match self { + Self::ProxyV1(api) => api.wake_compute(ctx, user_info).await, Self::Neon(api) => api.wake_compute(ctx, user_info).await, #[cfg(any(test, feature = "testing"))] Self::PostgresMock(api) => api.wake_compute(ctx, user_info).await, diff --git a/proxy/src/control_plane/client/neon.rs b/proxy/src/control_plane/client/neon.rs index 2cad981d01ac..bf62c0d6abd3 100644 --- a/proxy/src/control_plane/client/neon.rs +++ b/proxy/src/control_plane/client/neon.rs @@ -1,4 +1,4 @@ -//! Production console backend. +//! Stale console backend, remove after migrating to Proxy V1 API (#15245). use std::sync::Arc; use std::time::Duration; @@ -6,8 +6,8 @@ use std::time::Duration; use ::http::header::AUTHORIZATION; use ::http::HeaderName; use futures::TryFutureExt; +use postgres_client::config::SslMode; use tokio::time::Instant; -use tokio_postgres::config::SslMode; use tracing::{debug, info, info_span, warn, Instrument}; use super::super::messages::{ControlPlaneErrorMessage, GetRoleSecret, WakeCompute}; @@ -241,8 +241,8 @@ impl NeonControlPlaneClient { // Don't set anything but host and port! This config will be cached. // We'll set username and such later using the startup message. // TODO: add more type safety (in progress). - let mut config = compute::ConnCfg::new(); - config.host(host).port(port).ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes. + let mut config = compute::ConnCfg::new(host.to_owned(), port); + config.ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes. let node = NodeInfo { config, diff --git a/proxy/src/control_plane/messages.rs b/proxy/src/control_plane/messages.rs index 8762ba874bdf..2662ab85f96f 100644 --- a/proxy/src/control_plane/messages.rs +++ b/proxy/src/control_plane/messages.rs @@ -230,6 +230,16 @@ pub(crate) struct GetRoleSecret { pub(crate) project_id: Option, } +/// Response which holds client's auth secret, e.g. [`crate::scram::ServerSecret`]. +/// Returned by the `/get_endpoint_access_control` API method. +#[derive(Deserialize)] +pub(crate) struct GetEndpointAccessControl { + pub(crate) role_secret: Box, + pub(crate) allowed_ips: Option>, + pub(crate) project_id: Option, + pub(crate) allowed_vpc_endpoint_ids: Option>, +} + // Manually implement debug to omit sensitive info. impl fmt::Debug for GetRoleSecret { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { diff --git a/proxy/src/error.rs b/proxy/src/error.rs index 2221aac407fc..6a379499dc62 100644 --- a/proxy/src/error.rs +++ b/proxy/src/error.rs @@ -84,7 +84,7 @@ pub(crate) trait ReportableError: fmt::Display + Send + 'static { fn get_error_kind(&self) -> ErrorKind; } -impl ReportableError for tokio_postgres::error::Error { +impl ReportableError for postgres_client::error::Error { fn get_error_kind(&self) -> ErrorKind { if self.as_db_error().is_some() { ErrorKind::Postgres diff --git a/proxy/src/postgres_rustls/mod.rs b/proxy/src/postgres_rustls/mod.rs index 31e7915e89fd..5ef20991c309 100644 --- a/proxy/src/postgres_rustls/mod.rs +++ b/proxy/src/postgres_rustls/mod.rs @@ -1,10 +1,10 @@ use std::convert::TryFrom; use std::sync::Arc; +use postgres_client::tls::MakeTlsConnect; use rustls::pki_types::ServerName; use rustls::ClientConfig; use tokio::io::{AsyncRead, AsyncWrite}; -use tokio_postgres::tls::MakeTlsConnect; mod private { use std::future::Future; @@ -12,9 +12,9 @@ mod private { use std::pin::Pin; use std::task::{Context, Poll}; + use postgres_client::tls::{ChannelBinding, TlsConnect}; use rustls::pki_types::ServerName; use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; - use tokio_postgres::tls::{ChannelBinding, TlsConnect}; use tokio_rustls::client::TlsStream; use tokio_rustls::TlsConnector; @@ -59,7 +59,7 @@ mod private { pub struct RustlsStream(TlsStream); - impl tokio_postgres::tls::TlsStream for RustlsStream + impl postgres_client::tls::TlsStream for RustlsStream where S: AsyncRead + AsyncWrite + Unpin, { diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index 2e759b0894a2..a3027abd7cae 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -66,6 +66,8 @@ pub(crate) trait ComputeConnectBackend { } pub(crate) struct TcpMechanism<'a> { + pub(crate) params_compat: bool, + /// KV-dictionary with PostgreSQL connection params. pub(crate) params: &'a StartupMessageParams, @@ -86,13 +88,13 @@ impl ConnectMechanism for TcpMechanism<'_> { node_info: &control_plane::CachedNodeInfo, timeout: time::Duration, ) -> Result { - let host = node_info.config.get_host()?; + let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; permit.release_result(node_info.connect(ctx, timeout).await) } fn update_connect_config(&self, config: &mut compute::ConnCfg) { - config.set_startup_params(self.params); + config.set_startup_params(self.params, self.params_compat); } } diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index 956036d29d2e..f74eb5940fb2 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -338,9 +338,17 @@ pub(crate) async fn handle_client( } }; + let params_compat = match &user_info { + auth::Backend::ControlPlane(_, info) => { + info.info.options.get(NeonOptions::PARAMS_COMPAT).is_some() + } + auth::Backend::Local(_) => false, + }; + let mut node = connect_to_compute( ctx, &TcpMechanism { + params_compat, params: ¶ms, locks: &config.connect_compute_locks, }, @@ -384,11 +392,13 @@ pub(crate) async fn prepare_client_connection

( // The new token (cancel_key_data) will be sent to the client. let cancel_key_data = session.enable_query_cancellation(node.cancel_closure.clone()); + // Forward all deferred notices to the client. + for notice in &node.delayed_notice { + stream.write_message_noflush(&Be::Raw(b'N', notice.as_bytes()))?; + } + // Forward all postgres connection params to the client. - // Right now the implementation is very hacky and inefficent (ideally, - // we don't need an intermediate hashmap), but at least it should be correct. for (name, value) in &node.params { - // TODO: Theoretically, this could result in a big pile of params... stream.write_message_noflush(&Be::ParameterStatus { name: name.as_bytes(), value: value.as_bytes(), @@ -407,19 +417,47 @@ pub(crate) async fn prepare_client_connection

( pub(crate) struct NeonOptions(Vec<(SmolStr, SmolStr)>); impl NeonOptions { + // proxy options: + + /// `PARAMS_COMPAT` allows opting in to forwarding all startup parameters from client to compute. + const PARAMS_COMPAT: &str = "proxy_params_compat"; + + // cplane options: + + /// `LSN` allows provisioning an ephemeral compute with time-travel to the provided LSN. + const LSN: &str = "lsn"; + + /// `ENDPOINT_TYPE` allows configuring an ephemeral compute to be read_only or read_write. + const ENDPOINT_TYPE: &str = "endpoint_type"; + pub(crate) fn parse_params(params: &StartupMessageParams) -> Self { params .options_raw() .map(Self::parse_from_iter) .unwrap_or_default() } + pub(crate) fn parse_options_raw(options: &str) -> Self { Self::parse_from_iter(StartupMessageParams::parse_options_raw(options)) } + pub(crate) fn get(&self, key: &str) -> Option { + self.0 + .iter() + .find_map(|(k, v)| (k == key).then_some(v)) + .cloned() + } + pub(crate) fn is_ephemeral(&self) -> bool { - // Currently, neon endpoint options are all reserved for ephemeral endpoints. - !self.0.is_empty() + self.0.iter().any(|(k, _)| match &**k { + // This is not a cplane option, we know it does not create ephemeral computes. + Self::PARAMS_COMPAT => false, + Self::LSN => true, + Self::ENDPOINT_TYPE => true, + // err on the side of caution. any cplane options we don't know about + // might lead to ephemeral computes. + _ => true, + }) } fn parse_from_iter<'a>(options: impl Iterator) -> Self { diff --git a/proxy/src/proxy/retry.rs b/proxy/src/proxy/retry.rs index d3f0c3e7d471..42d1491782dd 100644 --- a/proxy/src/proxy/retry.rs +++ b/proxy/src/proxy/retry.rs @@ -31,9 +31,9 @@ impl CouldRetry for io::Error { } } -impl CouldRetry for tokio_postgres::error::DbError { +impl CouldRetry for postgres_client::error::DbError { fn could_retry(&self) -> bool { - use tokio_postgres::error::SqlState; + use postgres_client::error::SqlState; matches!( self.code(), &SqlState::CONNECTION_FAILURE @@ -43,9 +43,9 @@ impl CouldRetry for tokio_postgres::error::DbError { ) } } -impl ShouldRetryWakeCompute for tokio_postgres::error::DbError { +impl ShouldRetryWakeCompute for postgres_client::error::DbError { fn should_retry_wake_compute(&self) -> bool { - use tokio_postgres::error::SqlState; + use postgres_client::error::SqlState; // Here are errors that happens after the user successfully authenticated to the database. // TODO: there are pgbouncer errors that should be retried, but they are not listed here. !matches!( @@ -61,21 +61,21 @@ impl ShouldRetryWakeCompute for tokio_postgres::error::DbError { } } -impl CouldRetry for tokio_postgres::Error { +impl CouldRetry for postgres_client::Error { fn could_retry(&self) -> bool { if let Some(io_err) = self.source().and_then(|x| x.downcast_ref()) { io::Error::could_retry(io_err) } else if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) { - tokio_postgres::error::DbError::could_retry(db_err) + postgres_client::error::DbError::could_retry(db_err) } else { false } } } -impl ShouldRetryWakeCompute for tokio_postgres::Error { +impl ShouldRetryWakeCompute for postgres_client::Error { fn should_retry_wake_compute(&self) -> bool { if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) { - tokio_postgres::error::DbError::should_retry_wake_compute(db_err) + postgres_client::error::DbError::should_retry_wake_compute(db_err) } else { // likely an IO error. Possible the compute has shutdown and the // cache is stale. diff --git a/proxy/src/proxy/tests/mitm.rs b/proxy/src/proxy/tests/mitm.rs index fe211adfeb7b..59c9ac27b838 100644 --- a/proxy/src/proxy/tests/mitm.rs +++ b/proxy/src/proxy/tests/mitm.rs @@ -8,9 +8,9 @@ use std::fmt::Debug; use bytes::{Bytes, BytesMut}; use futures::{SinkExt, StreamExt}; +use postgres_client::tls::TlsConnect; use postgres_protocol::message::frontend; use tokio::io::{AsyncReadExt, DuplexStream}; -use tokio_postgres::tls::TlsConnect; use tokio_util::codec::{Decoder, Encoder}; use super::*; @@ -55,7 +55,13 @@ async fn proxy_mitm( // give the end_server the startup parameters let mut buf = BytesMut::new(); - frontend::startup_message(startup.iter(), &mut buf).unwrap(); + frontend::startup_message( + &postgres_protocol::message::frontend::StartupMessageParams { + params: startup.params.into(), + }, + &mut buf, + ) + .unwrap(); end_server.send(buf.freeze()).await.unwrap(); // proxy messages between end_client and end_server @@ -158,8 +164,8 @@ async fn scram_auth_disable_channel_binding() -> anyhow::Result<()> { Scram::new("password").await?, )); - let _client_err = tokio_postgres::Config::new() - .channel_binding(tokio_postgres::config::ChannelBinding::Disable) + let _client_err = postgres_client::Config::new("test".to_owned(), 5432) + .channel_binding(postgres_client::config::ChannelBinding::Disable) .user("user") .dbname("db") .password("password") @@ -175,7 +181,7 @@ async fn scram_auth_disable_channel_binding() -> anyhow::Result<()> { async fn scram_auth_prefer_channel_binding() -> anyhow::Result<()> { connect_failure( Intercept::None, - tokio_postgres::config::ChannelBinding::Prefer, + postgres_client::config::ChannelBinding::Prefer, ) .await } @@ -185,7 +191,7 @@ async fn scram_auth_prefer_channel_binding() -> anyhow::Result<()> { async fn scram_auth_prefer_channel_binding_intercept() -> anyhow::Result<()> { connect_failure( Intercept::Methods, - tokio_postgres::config::ChannelBinding::Prefer, + postgres_client::config::ChannelBinding::Prefer, ) .await } @@ -195,7 +201,7 @@ async fn scram_auth_prefer_channel_binding_intercept() -> anyhow::Result<()> { async fn scram_auth_prefer_channel_binding_intercept_response() -> anyhow::Result<()> { connect_failure( Intercept::SASLResponse, - tokio_postgres::config::ChannelBinding::Prefer, + postgres_client::config::ChannelBinding::Prefer, ) .await } @@ -205,7 +211,7 @@ async fn scram_auth_prefer_channel_binding_intercept_response() -> anyhow::Resul async fn scram_auth_require_channel_binding() -> anyhow::Result<()> { connect_failure( Intercept::None, - tokio_postgres::config::ChannelBinding::Require, + postgres_client::config::ChannelBinding::Require, ) .await } @@ -215,7 +221,7 @@ async fn scram_auth_require_channel_binding() -> anyhow::Result<()> { async fn scram_auth_require_channel_binding_intercept() -> anyhow::Result<()> { connect_failure( Intercept::Methods, - tokio_postgres::config::ChannelBinding::Require, + postgres_client::config::ChannelBinding::Require, ) .await } @@ -225,14 +231,14 @@ async fn scram_auth_require_channel_binding_intercept() -> anyhow::Result<()> { async fn scram_auth_require_channel_binding_intercept_response() -> anyhow::Result<()> { connect_failure( Intercept::SASLResponse, - tokio_postgres::config::ChannelBinding::Require, + postgres_client::config::ChannelBinding::Require, ) .await } async fn connect_failure( intercept: Intercept, - channel_binding: tokio_postgres::config::ChannelBinding, + channel_binding: postgres_client::config::ChannelBinding, ) -> anyhow::Result<()> { let (server, client, client_config, server_config) = proxy_mitm(intercept).await; let proxy = tokio::spawn(dummy_proxy( @@ -241,7 +247,7 @@ async fn connect_failure( Scram::new("password").await?, )); - let _client_err = tokio_postgres::Config::new() + let _client_err = postgres_client::Config::new("test".to_owned(), 5432) .channel_binding(channel_binding) .user("user") .dbname("db") diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index 2c2c2964b6bb..911b349416f2 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -7,13 +7,13 @@ use std::time::Duration; use anyhow::{bail, Context}; use async_trait::async_trait; use http::StatusCode; +use postgres_client::config::SslMode; +use postgres_client::tls::{MakeTlsConnect, NoTls}; use retry::{retry_after, ShouldRetryWakeCompute}; use rstest::rstest; use rustls::crypto::ring; use rustls::pki_types; use tokio::io::DuplexStream; -use tokio_postgres::config::SslMode; -use tokio_postgres::tls::{MakeTlsConnect, NoTls}; use super::connect_compute::ConnectMechanism; use super::retry::CouldRetry; @@ -204,7 +204,7 @@ async fn handshake_tls_is_enforced_by_proxy() -> anyhow::Result<()> { let (_, server_config) = generate_tls_config("generic-project-name.localhost", "localhost")?; let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), NoAuth)); - let client_err = tokio_postgres::Config::new() + let client_err = postgres_client::Config::new("test".to_owned(), 5432) .user("john_doe") .dbname("earth") .ssl_mode(SslMode::Disable) @@ -233,7 +233,7 @@ async fn handshake_tls() -> anyhow::Result<()> { generate_tls_config("generic-project-name.localhost", "localhost")?; let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), NoAuth)); - let (_client, _conn) = tokio_postgres::Config::new() + let _conn = postgres_client::Config::new("test".to_owned(), 5432) .user("john_doe") .dbname("earth") .ssl_mode(SslMode::Require) @@ -249,10 +249,10 @@ async fn handshake_raw() -> anyhow::Result<()> { let proxy = tokio::spawn(dummy_proxy(client, None, NoAuth)); - let (_client, _conn) = tokio_postgres::Config::new() + let _conn = postgres_client::Config::new("test".to_owned(), 5432) .user("john_doe") .dbname("earth") - .options("project=generic-project-name") + .set_param("options", "project=generic-project-name") .ssl_mode(SslMode::Prefer) .connect_raw(server, NoTls) .await?; @@ -296,8 +296,8 @@ async fn scram_auth_good(#[case] password: &str) -> anyhow::Result<()> { Scram::new(password).await?, )); - let (_client, _conn) = tokio_postgres::Config::new() - .channel_binding(tokio_postgres::config::ChannelBinding::Require) + let _conn = postgres_client::Config::new("test".to_owned(), 5432) + .channel_binding(postgres_client::config::ChannelBinding::Require) .user("user") .dbname("db") .password(password) @@ -320,8 +320,8 @@ async fn scram_auth_disable_channel_binding() -> anyhow::Result<()> { Scram::new("password").await?, )); - let (_client, _conn) = tokio_postgres::Config::new() - .channel_binding(tokio_postgres::config::ChannelBinding::Disable) + let _conn = postgres_client::Config::new("test".to_owned(), 5432) + .channel_binding(postgres_client::config::ChannelBinding::Disable) .user("user") .dbname("db") .password("password") @@ -348,7 +348,7 @@ async fn scram_auth_mock() -> anyhow::Result<()> { .map(char::from) .collect(); - let _client_err = tokio_postgres::Config::new() + let _client_err = postgres_client::Config::new("test".to_owned(), 5432) .user("user") .dbname("db") .password(&password) // no password will match the mocked secret @@ -546,7 +546,7 @@ impl TestControlPlaneClient for TestConnectMechanism { fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo { let node = NodeInfo { - config: compute::ConnCfg::new(), + config: compute::ConnCfg::new("test".to_owned(), 5432), aux: MetricsAuxInfo { endpoint_id: (&EndpointId::from("endpoint")).into(), project_id: (&ProjectId::from("project")).into(), diff --git a/proxy/src/redis/elasticache.rs b/proxy/src/redis/elasticache.rs index d118c8f4128c..bf6dde933285 100644 --- a/proxy/src/redis/elasticache.rs +++ b/proxy/src/redis/elasticache.rs @@ -1,6 +1,14 @@ +use std::sync::Arc; use std::time::{Duration, SystemTime}; +use aws_config::environment::EnvironmentVariableCredentialsProvider; +use aws_config::imds::credentials::ImdsCredentialsProvider; use aws_config::meta::credentials::CredentialsProviderChain; +use aws_config::meta::region::RegionProviderChain; +use aws_config::profile::ProfileFileCredentialsProvider; +use aws_config::provider_config::ProviderConfig; +use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider; +use aws_config::Region; use aws_sdk_iam::config::ProvideCredentials; use aws_sigv4::http_request::{ self, SignableBody, SignableRequest, SignatureLocation, SigningSettings, @@ -45,12 +53,45 @@ pub struct CredentialsProvider { } impl CredentialsProvider { - pub fn new(config: AWSIRSAConfig, credentials_provider: CredentialsProviderChain) -> Self { - CredentialsProvider { - config, - credentials_provider, - } + pub async fn new( + aws_region: String, + redis_cluster_name: Option, + redis_user_id: Option, + ) -> Arc { + let region_provider = + RegionProviderChain::default_provider().or_else(Region::new(aws_region.clone())); + let provider_conf = + ProviderConfig::without_region().with_region(region_provider.region().await); + let aws_credentials_provider = { + // uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY" + CredentialsProviderChain::first_try( + "env", + EnvironmentVariableCredentialsProvider::new(), + ) + // uses "AWS_PROFILE" / `aws sso login --profile ` + .or_else( + "profile-sso", + ProfileFileCredentialsProvider::builder() + .configure(&provider_conf) + .build(), + ) + // uses "AWS_WEB_IDENTITY_TOKEN_FILE", "AWS_ROLE_ARN", "AWS_ROLE_SESSION_NAME" + // needed to access remote extensions bucket + .or_else( + "token", + WebIdentityTokenCredentialsProvider::builder() + .configure(&provider_conf) + .build(), + ) + // uses imds v2 + .or_else("imds", ImdsCredentialsProvider::builder().build()) + }; + Arc::new(CredentialsProvider { + config: AWSIRSAConfig::new(aws_region, redis_cluster_name, redis_user_id), + credentials_provider: aws_credentials_provider, + }) } + pub(crate) async fn provide_credentials(&self) -> anyhow::Result<(String, String)> { let aws_credentials = self .credentials_provider diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 75909f3358d2..251aa470843d 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -37,9 +37,9 @@ use crate::types::{EndpointId, Host, LOCAL_PROXY_SUFFIX}; pub(crate) struct PoolingBackend { pub(crate) http_conn_pool: Arc>>, - pub(crate) local_pool: Arc>, + pub(crate) local_pool: Arc>, pub(crate) pool: - Arc>>, + Arc>>, pub(crate) config: &'static ProxyConfig, pub(crate) auth_backend: &'static crate::auth::Backend<'static, ()>, @@ -53,6 +53,8 @@ impl PoolingBackend { user_info: &ComputeUserInfo, password: &[u8], ) -> Result { + ctx.set_auth_method(crate::context::AuthMethod::Cleartext); + let user_info = user_info.clone(); let backend = self.auth_backend.as_ref().map(|()| user_info.clone()); let (allowed_ips, maybe_secret) = backend.get_allowed_ips_and_secret(ctx).await?; @@ -115,6 +117,8 @@ impl PoolingBackend { user_info: &ComputeUserInfo, jwt: String, ) -> Result { + ctx.set_auth_method(crate::context::AuthMethod::Jwt); + match &self.auth_backend { crate::auth::Backend::ControlPlane(console, ()) => { self.config @@ -166,7 +170,7 @@ impl PoolingBackend { conn_info: ConnInfo, keys: ComputeCredentials, force_new: bool, - ) -> Result, HttpConnError> { + ) -> Result, HttpConnError> { let maybe_client = if force_new { debug!("pool: pool is disabled"); None @@ -252,7 +256,7 @@ impl PoolingBackend { &self, ctx: &RequestContext, conn_info: ConnInfo, - ) -> Result, HttpConnError> { + ) -> Result, HttpConnError> { if let Some(client) = self.local_pool.get(ctx, &conn_info)? { return Ok(client); } @@ -305,13 +309,16 @@ impl PoolingBackend { .config .user(&conn_info.user_info.user) .dbname(&conn_info.dbname) - .options(&format!( - "-c pg_session_jwt.jwk={}", - serde_json::to_string(&jwk).expect("serializing jwk to json should not fail") - )); + .set_param( + "options", + &format!( + "-c pg_session_jwt.jwk={}", + serde_json::to_string(&jwk).expect("serializing jwk to json should not fail") + ), + ); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); - let (client, connection) = config.connect(tokio_postgres::NoTls).await?; + let (client, connection) = config.connect(postgres_client::NoTls).await?; drop(pause); let pid = client.get_process_id(); @@ -356,7 +363,7 @@ pub(crate) enum HttpConnError { #[error("pooled connection closed at inconsistent state")] ConnectionClosedAbruptly(#[from] tokio::sync::watch::error::SendError), #[error("could not connection to postgres in compute")] - PostgresConnectionError(#[from] tokio_postgres::Error), + PostgresConnectionError(#[from] postgres_client::Error), #[error("could not connection to local-proxy in compute")] LocalProxyConnectionError(#[from] LocalProxyConnError), #[error("could not parse JWT payload")] @@ -475,7 +482,7 @@ impl ShouldRetryWakeCompute for LocalProxyConnError { } struct TokioMechanism { - pool: Arc>>, + pool: Arc>>, conn_info: ConnInfo, conn_id: uuid::Uuid, @@ -485,7 +492,7 @@ struct TokioMechanism { #[async_trait] impl ConnectMechanism for TokioMechanism { - type Connection = Client; + type Connection = Client; type ConnectError = HttpConnError; type Error = HttpConnError; @@ -495,7 +502,7 @@ impl ConnectMechanism for TokioMechanism { node_info: &CachedNodeInfo, timeout: Duration, ) -> Result { - let host = node_info.config.get_host()?; + let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; let mut config = (*node_info.config).clone(); @@ -505,7 +512,7 @@ impl ConnectMechanism for TokioMechanism { .connect_timeout(timeout); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); - let res = config.connect(tokio_postgres::NoTls).await; + let res = config.connect(postgres_client::NoTls).await; drop(pause); let (client, connection) = permit.release_result(res)?; @@ -545,16 +552,12 @@ impl ConnectMechanism for HyperMechanism { node_info: &CachedNodeInfo, timeout: Duration, ) -> Result { - let host = node_info.config.get_host()?; + let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); - let port = *node_info.config.get_ports().first().ok_or_else(|| { - HttpConnError::WakeCompute(WakeComputeError::BadComputeAddress( - "local-proxy port missing on compute address".into(), - )) - })?; + let port = node_info.config.get_port(); let res = connect_http2(&host, port, timeout).await; drop(pause); let (client, connection) = permit.release_result(res)?; diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs index c302eac5684b..cac5a173cb16 100644 --- a/proxy/src/serverless/conn_pool.rs +++ b/proxy/src/serverless/conn_pool.rs @@ -5,11 +5,11 @@ use std::task::{ready, Poll}; use futures::future::poll_fn; use futures::Future; +use postgres_client::tls::NoTlsStream; +use postgres_client::AsyncMessage; use smallvec::SmallVec; use tokio::net::TcpStream; use tokio::time::Instant; -use tokio_postgres::tls::NoTlsStream; -use tokio_postgres::AsyncMessage; use tokio_util::sync::CancellationToken; use tracing::{error, info, info_span, warn, Instrument}; #[cfg(test)] @@ -58,7 +58,7 @@ pub(crate) fn poll_client( ctx: &RequestContext, conn_info: ConnInfo, client: C, - mut connection: tokio_postgres::Connection, + mut connection: postgres_client::Connection, conn_id: uuid::Uuid, aux: MetricsAuxInfo, ) -> Client { diff --git a/proxy/src/serverless/conn_pool_lib.rs b/proxy/src/serverless/conn_pool_lib.rs index fe1d2563bca1..2a46c8f9c5cf 100644 --- a/proxy/src/serverless/conn_pool_lib.rs +++ b/proxy/src/serverless/conn_pool_lib.rs @@ -7,8 +7,8 @@ use std::time::Duration; use dashmap::DashMap; use parking_lot::RwLock; +use postgres_client::ReadyForQueryStatus; use rand::Rng; -use tokio_postgres::ReadyForQueryStatus; use tracing::{debug, info, Span}; use super::backend::HttpConnError; @@ -683,7 +683,7 @@ pub(crate) trait ClientInnerExt: Sync + Send + 'static { fn get_process_id(&self) -> i32; } -impl ClientInnerExt for tokio_postgres::Client { +impl ClientInnerExt for postgres_client::Client { fn is_closed(&self) -> bool { self.is_closed() } diff --git a/proxy/src/serverless/json.rs b/proxy/src/serverless/json.rs index 569e2da5715a..25b25c66d3fb 100644 --- a/proxy/src/serverless/json.rs +++ b/proxy/src/serverless/json.rs @@ -1,6 +1,6 @@ +use postgres_client::types::{Kind, Type}; +use postgres_client::Row; use serde_json::{Map, Value}; -use tokio_postgres::types::{Kind, Type}; -use tokio_postgres::Row; // // Convert json non-string types to strings, so that they can be passed to Postgres @@ -61,7 +61,7 @@ fn json_array_to_pg_array(value: &Value) -> Option { #[derive(Debug, thiserror::Error)] pub(crate) enum JsonConversionError { #[error("internal error compute returned invalid data: {0}")] - AsTextError(tokio_postgres::Error), + AsTextError(postgres_client::Error), #[error("parse int error: {0}")] ParseIntError(#[from] std::num::ParseIntError), #[error("parse float error: {0}")] diff --git a/proxy/src/serverless/local_conn_pool.rs b/proxy/src/serverless/local_conn_pool.rs index db9ac49dae8f..b84cde9e252a 100644 --- a/proxy/src/serverless/local_conn_pool.rs +++ b/proxy/src/serverless/local_conn_pool.rs @@ -22,13 +22,13 @@ use indexmap::IndexMap; use jose_jwk::jose_b64::base64ct::{Base64UrlUnpadded, Encoding}; use p256::ecdsa::{Signature, SigningKey}; use parking_lot::RwLock; +use postgres_client::tls::NoTlsStream; +use postgres_client::types::ToSql; +use postgres_client::AsyncMessage; use serde_json::value::RawValue; use signature::Signer; use tokio::net::TcpStream; use tokio::time::Instant; -use tokio_postgres::tls::NoTlsStream; -use tokio_postgres::types::ToSql; -use tokio_postgres::AsyncMessage; use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, info_span, warn, Instrument}; @@ -164,7 +164,7 @@ pub(crate) fn poll_client( ctx: &RequestContext, conn_info: ConnInfo, client: C, - mut connection: tokio_postgres::Connection, + mut connection: postgres_client::Connection, key: SigningKey, conn_id: uuid::Uuid, aux: MetricsAuxInfo, @@ -280,7 +280,7 @@ pub(crate) fn poll_client( ) } -impl ClientInnerCommon { +impl ClientInnerCommon { pub(crate) async fn set_jwt_session(&mut self, payload: &[u8]) -> Result<(), HttpConnError> { if let ClientDataEnum::Local(local_data) = &mut self.data { local_data.jti += 1; diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index afd93d02f047..5e85f5ec4019 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -11,12 +11,12 @@ use http_body_util::{BodyExt, Full}; use hyper::body::Incoming; use hyper::http::{HeaderName, HeaderValue}; use hyper::{header, HeaderMap, Request, Response, StatusCode}; +use postgres_client::error::{DbError, ErrorPosition, SqlState}; +use postgres_client::{GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, Transaction}; use pq_proto::StartupMessageParamsBuilder; use serde::Serialize; use serde_json::Value; use tokio::time::{self, Instant}; -use tokio_postgres::error::{DbError, ErrorPosition, SqlState}; -use tokio_postgres::{GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, Transaction}; use tokio_util::sync::CancellationToken; use tracing::{debug, error, info}; use typed_json::json; @@ -139,9 +139,6 @@ fn get_conn_info( headers: &HeaderMap, tls: Option<&TlsConfig>, ) -> Result { - // HTTP only uses cleartext (for now and likely always) - ctx.set_auth_method(crate::context::AuthMethod::Cleartext); - let connection_string = headers .get(&CONN_STRING) .ok_or(ConnInfoError::InvalidHeader(&CONN_STRING))? @@ -364,7 +361,7 @@ pub(crate) enum SqlOverHttpError { #[error("invalid isolation level")] InvalidIsolationLevel, #[error("{0}")] - Postgres(#[from] tokio_postgres::Error), + Postgres(#[from] postgres_client::Error), #[error("{0}")] JsonConversion(#[from] JsonConversionError), #[error("{0}")] @@ -989,7 +986,7 @@ async fn query_to_json( // Manually drain the stream into a vector to leave row_stream hanging // around to get a command tag. Also check that the response is not too // big. - let mut rows: Vec = Vec::new(); + let mut rows: Vec = Vec::new(); while let Some(row) = row_stream.next().await { let row = row?; *current_size += row.body_len(); @@ -1066,13 +1063,13 @@ async fn query_to_json( } enum Client { - Remote(conn_pool_lib::Client), - Local(conn_pool_lib::Client), + Remote(conn_pool_lib::Client), + Local(conn_pool_lib::Client), } enum Discard<'a> { - Remote(conn_pool_lib::Discard<'a, tokio_postgres::Client>), - Local(conn_pool_lib::Discard<'a, tokio_postgres::Client>), + Remote(conn_pool_lib::Discard<'a, postgres_client::Client>), + Local(conn_pool_lib::Discard<'a, postgres_client::Client>), } impl Client { @@ -1083,7 +1080,7 @@ impl Client { } } - fn inner(&mut self) -> (&mut tokio_postgres::Client, Discard<'_>) { + fn inner(&mut self) -> (&mut postgres_client::Client, Discard<'_>) { match self { Client::Remote(client) => { let (c, d) = client.inner(); diff --git a/safekeeper/benches/receive_wal.rs b/safekeeper/benches/receive_wal.rs index 8c4281cf527e..313d945b942f 100644 --- a/safekeeper/benches/receive_wal.rs +++ b/safekeeper/benches/receive_wal.rs @@ -24,9 +24,15 @@ const KB: usize = 1024; const MB: usize = 1024 * KB; const GB: usize = 1024 * MB; +/// Use jemalloc, and configure it to sample allocations for profiles every 1 MB. +/// This mirrors the configuration in bin/safekeeper.rs. #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +#[allow(non_upper_case_globals)] +#[export_name = "malloc_conf"] +pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; + // Register benchmarks with Criterion. criterion_group!( name = benches; diff --git a/safekeeper/spec/.gitignore b/safekeeper/spec/.gitignore new file mode 100644 index 000000000000..7233153039a6 --- /dev/null +++ b/safekeeper/spec/.gitignore @@ -0,0 +1,3 @@ +*TTrace* +*.toolbox/ +states/ diff --git a/safekeeper/spec/MCProposerAcceptorStatic.tla b/safekeeper/spec/MCProposerAcceptorStatic.tla new file mode 100644 index 000000000000..be3d99c6976d --- /dev/null +++ b/safekeeper/spec/MCProposerAcceptorStatic.tla @@ -0,0 +1,31 @@ +---- MODULE MCProposerAcceptorStatic ---- +EXTENDS TLC, ProposerAcceptorStatic + +\* Augments the spec with model checking constraints. + +\* For model checking. +CONSTANTS + max_entries, \* model constraint: max log entries acceptor/proposer can hold + max_term \* model constraint: max allowed term + +ASSUME max_entries \in Nat /\ max_term \in Nat + +\* Model space constraint. +StateConstraint == \A p \in proposers: + /\ prop_state[p].term <= max_term + /\ Len(prop_state[p].wal) <= max_entries +\* Sets of proposers and acceptors are symmetric because we don't take any +\* actions depending on some concrete proposer/acceptor (like IF p = p1 THEN +\* ...) +ProposerAcceptorSymmetry == Permutations(proposers) \union Permutations(acceptors) + +\* enforce order of the vars in the error trace with ALIAS +\* Note that ALIAS is supported only since version 1.8.0 which is pre-release +\* as of writing this. +Alias == [ + prop_state |-> prop_state, + acc_state |-> acc_state, + committed |-> committed + ] + +==== diff --git a/safekeeper/spec/ProposerAcceptorConsensus.cfg b/safekeeper/spec/ProposerAcceptorConsensus.cfg deleted file mode 100644 index 989c86e47d75..000000000000 --- a/safekeeper/spec/ProposerAcceptorConsensus.cfg +++ /dev/null @@ -1,34 +0,0 @@ -\* MV CONSTANT declarations -CONSTANT NULL = NULL -CONSTANTS -p1 = p1 -p2 = p2 -p3 = p3 -a1 = a1 -a2 = a2 -a3 = a3 -\* MV CONSTANT definitions -CONSTANT -proposers = {p1, p2} -acceptors = {a1, a2, a3} -\* SYMMETRY definition -SYMMETRY perms -\* CONSTANT definitions -CONSTANT -max_term = 3 -CONSTANT -max_entries = 3 -\* INIT definition -INIT -Init -\* NEXT definition -NEXT -Next -\* INVARIANT definition -INVARIANT -TypeOk -ElectionSafety -LogIsMonotonic -LogSafety -CommittedNotOverwritten -CHECK_DEADLOCK FALSE \ No newline at end of file diff --git a/safekeeper/spec/ProposerAcceptorConsensus.tla b/safekeeper/spec/ProposerAcceptorConsensus.tla deleted file mode 100644 index e5f0bb270f08..000000000000 --- a/safekeeper/spec/ProposerAcceptorConsensus.tla +++ /dev/null @@ -1,363 +0,0 @@ ----- MODULE ProposerAcceptorConsensus ---- - -\* Differences from current implementation: -\* - unified not-globally-unique epoch & term (node_id) -\* Simplifications: -\* - instant message delivery -\* - feedback is not modeled separately, commit_lsn is updated directly - -EXTENDS Integers, Sequences, FiniteSets, TLC - -VARIABLES - prop_state, \* prop_state[p] is state of proposer p - acc_state, \* acc_state[a] is state of acceptor a - commit_lsns \* map of acceptor -> commit_lsn - -CONSTANT - acceptors, - proposers, - max_entries, \* model constraint: max log entries acceptor/proposer can hold - max_term \* model constraint: max allowed term - -CONSTANT NULL - -ASSUME max_entries \in Nat /\ max_term \in Nat - -\* For specifying symmetry set in manual cfg file, see -\* https://github.com/tlaplus/tlaplus/issues/404 -perms == Permutations(proposers) \union Permutations(acceptors) - -\******************************************************************************** -\* Helpers -\******************************************************************************** - -Maximum(S) == - (*************************************************************************) - (* If S is a set of numbers, then this define Maximum(S) to be the *) - (* maximum of those numbers, or -1 if S is empty. *) - (*************************************************************************) - IF S = {} THEN -1 - ELSE CHOOSE n \in S : \A m \in S : n \geq m - -\* minimum of numbers in the set, error if set is empty -Minimum(S) == - CHOOSE min \in S : \A n \in S : min <= n - -\* Min of two numbers -Min(a, b) == IF a < b THEN a ELSE b - -\* Set of values of function f. XXX is there a such builtin? -FValues(f) == {f[a] : a \in DOMAIN f} - -\* Sort of 0 for functions -EmptyF == [x \in {} |-> 42] -IsEmptyF(f) == DOMAIN f = {} - -\* Next entry proposer p will push to acceptor a or NULL. -NextEntry(p, a) == - IF Len(prop_state[p].wal) >= prop_state[p].next_send_lsn[a] THEN - CHOOSE r \in FValues(prop_state[p].wal) : r.lsn = prop_state[p].next_send_lsn[a] - ELSE - NULL - - -\***************** - -NumAccs == Cardinality(acceptors) - -\* does acc_set form the quorum? -Quorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2 + 1) -\* all quorums of acceptors -Quorums == {subset \in SUBSET acceptors: Quorum(subset)} - -\* flush_lsn of acceptor a. -FlushLsn(a) == Len(acc_state[a].wal) - - -\******************************************************************************** -\* Type assertion -\******************************************************************************** -\* Defining sets of all possible tuples and using them in TypeOk in usual -\* all-tuples constructor is not practical because such definitions force -\* TLC to enumerate them, while they are are horribly enormous -\* (TLC screams "Attempted to construct a set with too many elements"). -\* So instead check types manually. -TypeOk == - /\ \A p \in proposers: - /\ DOMAIN prop_state[p] = {"state", "term", "votes", "donor_epoch", "vcl", "wal", "next_send_lsn"} - \* in campaign proposer sends RequestVote and waits for acks; - \* in leader he is elected - /\ prop_state[p].state \in {"campaign", "leader"} - \* 0..max_term should be actually Nat in the unbounded model, but TLC won't - \* swallow it - /\ prop_state[p].term \in 0..max_term - \* votes received - /\ \A voter \in DOMAIN prop_state[p].votes: - /\ voter \in acceptors - /\ prop_state[p].votes[voter] \in [epoch: 0..max_term, flush_lsn: 0..max_entries] - /\ prop_state[p].donor_epoch \in 0..max_term - \* wal is sequence of just records - /\ \A i \in DOMAIN prop_state[p].wal: - prop_state[p].wal[i] \in [lsn: 1..max_entries, epoch: 1..max_term] - \* Following implementation, we skew the original Aurora meaning of this; - \* here it is lsn of highest definitely committed record as set by proposer - \* when it is elected; it doesn't change since then - /\ prop_state[p].vcl \in 0..max_entries - \* map of acceptor -> next lsn to send - /\ \A a \in DOMAIN prop_state[p].next_send_lsn: - /\ a \in acceptors - /\ prop_state[p].next_send_lsn[a] \in 1..(max_entries + 1) - /\ \A a \in acceptors: - /\ DOMAIN acc_state[a] = {"term", "epoch", "wal"} - /\ acc_state[a].term \in 0..max_term - /\ acc_state[a].epoch \in 0..max_term - /\ \A i \in DOMAIN acc_state[a].wal: - acc_state[a].wal[i] \in [lsn: 1..max_entries, epoch: 1..max_term] - /\ \A a \in DOMAIN commit_lsns: - /\ a \in acceptors - /\ commit_lsns[a] \in 0..max_entries - -\******************************************************************************** -\* Initial -\******************************************************************************** - -Init == - /\ prop_state = [p \in proposers |-> [ - state |-> "campaign", - term |-> 1, - votes |-> EmptyF, - donor_epoch |-> 0, - vcl |-> 0, - wal |-> << >>, - next_send_lsn |-> EmptyF - ]] - /\ acc_state = [a \in acceptors |-> [ - \* there will be no leader in this term, 1 is the first real - term |-> 0, - epoch |-> 0, - wal |-> << >> - ]] - /\ commit_lsns = [a \in acceptors |-> 0] - - -\******************************************************************************** -\* Actions -\******************************************************************************** - -\* Proposer loses all state. -\* For simplicity (and to reduct state space), we assume it immediately gets -\* current state from quorum q of acceptors determining the term he will request -\* to vote for. -RestartProposer(p, q) == - /\ Quorum(q) - /\ LET - new_term == Maximum({acc_state[a].term : a \in q}) + 1 - IN - /\ new_term <= max_term - /\ prop_state' = [prop_state EXCEPT ![p].state = "campaign", - ![p].term = new_term, - ![p].votes = EmptyF, - ![p].donor_epoch = 0, - ![p].vcl = 0, - ![p].wal = << >>, - ![p].next_send_lsn = EmptyF] - /\ UNCHANGED <> - -\* Acceptor a immediately votes for proposer p. -Vote(p, a) == - /\ prop_state[p].state = "campaign" - /\ acc_state[a].term < prop_state[p].term \* main voting condition - /\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term] - /\ LET - vote == [epoch |-> acc_state[a].epoch, flush_lsn |-> FlushLsn(a)] - IN - prop_state' = [prop_state EXCEPT ![p].votes = prop_state[p].votes @@ (a :> vote)] - /\ UNCHANGED <> - - -\* Proposer p gets elected. -BecomeLeader(p) == - /\ prop_state[p].state = "campaign" - /\ Quorum(DOMAIN prop_state[p].votes) - /\ LET - max_epoch == Maximum({v.epoch : v \in FValues(prop_state[p].votes)}) - max_epoch_votes == {v \in FValues(prop_state[p].votes) : v.epoch = max_epoch} - donor == CHOOSE dv \in DOMAIN prop_state[p].votes : - /\ prop_state[p].votes[dv].epoch = max_epoch - /\ \A v \in max_epoch_votes: - prop_state[p].votes[dv].flush_lsn >= v.flush_lsn - max_vote == prop_state[p].votes[donor] - \* Establish lsn to stream from for voters. - \* At some point it seemed like we can regard log as correct and only - \* append to it if has in the max_epoch, however TLC showed that's not - \* the case; we must always stream since first not matching record. - next_send_lsn == [voter \in DOMAIN prop_state[p].votes |-> 1] - IN - \* we fetch log from the most advanced node (this is separate - \* roundtrip), make sure node is still on one term with us - /\ acc_state[donor].term = prop_state[p].term - /\ prop_state' = [prop_state EXCEPT ![p].state = "leader", - \* fetch the log from donor - ![p].wal = acc_state[donor].wal, - ![p].donor_epoch = max_epoch, - ![p].vcl = max_vote.flush_lsn, - ![p].next_send_lsn = next_send_lsn] - /\ UNCHANGED <> - - -\* acceptor a learns about elected proposer p's term. -UpdateTerm(p, a) == - /\ prop_state[p].state = "leader" - /\ acc_state[a].term < prop_state[p].term - /\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term] - /\ UNCHANGED <> - - -\* Acceptor a which didn't participate in voting connects to elected proposer p -\* and p sets the streaming point -HandshakeWithLeader(p, a) == - /\ prop_state[p].state = "leader" - /\ acc_state[a].term = prop_state[p].term - /\ a \notin DOMAIN prop_state[p].next_send_lsn - /\ LET - next_send_lsn == prop_state[p].next_send_lsn @@ (a :> 1) - IN - prop_state' = [prop_state EXCEPT ![p].next_send_lsn = next_send_lsn] - /\ UNCHANGED <> - - -\* Append new log entry to elected proposer -NewEntry(p) == - /\ prop_state[p].state = "leader" - /\ Len(prop_state[p].wal) < max_entries \* model constraint - /\ LET - new_lsn == IF Len(prop_state[p].wal) = 0 THEN - prop_state[p].vcl + 1 - ELSE - \* lsn of last record + 1 - prop_state[p].wal[Len(prop_state[p].wal)].lsn + 1 - new_entry == [lsn |-> new_lsn, epoch |-> prop_state[p].term] - IN - /\ prop_state' = [prop_state EXCEPT ![p].wal = Append(prop_state[p].wal, new_entry)] - /\ UNCHANGED <> - - -\* Write entry new_e to log wal, rolling back all higher entries if e is different. -\* If bump_epoch is TRUE, it means we get record with lsn=vcl and going to update -\* the epoch. Truncate log in this case as well, as we might have correct <= vcl -\* part and some outdated entries behind it which we want to purge before -\* declaring us as recovered. Another way to accomplish this (in previous commit) -\* is wait for first-entry-from-new-epoch before bumping it. -WriteEntry(wal, new_e, bump_epoch) == - (new_e.lsn :> new_e) @@ - \* If wal has entry with such lsn and it is different, truncate all higher log. - IF \/ (new_e.lsn \in DOMAIN wal /\ wal[new_e.lsn] /= new_e) - \/ bump_epoch THEN - SelectSeq(wal, LAMBDA e: e.lsn < new_e.lsn) - ELSE - wal - - -\* Try to transfer entry from elected proposer p to acceptor a -TransferEntry(p, a) == - /\ prop_state[p].state = "leader" - /\ prop_state[p].term = acc_state[a].term - /\ a \in DOMAIN prop_state[p].next_send_lsn - /\ LET - next_e == NextEntry(p, a) - IN - /\ next_e /= NULL - /\ LET - \* Consider bumping epoch if getting this entry recovers the acceptor, - \* that is, we reach first record behind VCL. - new_epoch == - IF /\ acc_state[a].epoch < prop_state[p].term - /\ next_e.lsn >= prop_state[p].vcl - THEN - prop_state[p].term - ELSE - acc_state[a].epoch - \* Also check whether this entry allows to advance commit_lsn and - \* if so, bump it where possible. Modeling this as separate action - \* significantly bloats the space (5m vs 15m on max_entries=3 max_term=3, - \* so act immediately. - entry_owners == {o \in acceptors: - /\ o /= a - \* only recovered acceptors advance commit_lsn - /\ acc_state[o].epoch = prop_state[p].term - /\ next_e \in FValues(acc_state[o].wal)} \cup {a} - IN - /\ acc_state' = [acc_state EXCEPT ![a].wal = WriteEntry(acc_state[a].wal, next_e, new_epoch /= acc_state[a].epoch), - ![a].epoch = new_epoch] - /\ prop_state' = [prop_state EXCEPT ![p].next_send_lsn[a] = - prop_state[p].next_send_lsn[a] + 1] - /\ commit_lsns' = IF /\ new_epoch = prop_state[p].term - /\ Quorum(entry_owners) - THEN - [acc \in acceptors |-> - IF /\ acc \in entry_owners - /\ next_e.lsn > commit_lsns[acc] - THEN - next_e.lsn - ELSE - commit_lsns[acc]] - ELSE - commit_lsns - - -\******************************************************************************* -\* Final spec -\******************************************************************************* - -Next == - \/ \E q \in Quorums: \E p \in proposers: RestartProposer(p, q) - \/ \E p \in proposers: \E a \in acceptors: Vote(p, a) - \/ \E p \in proposers: BecomeLeader(p) - \/ \E p \in proposers: \E a \in acceptors: UpdateTerm(p, a) - \/ \E p \in proposers: \E a \in acceptors: HandshakeWithLeader(p, a) - \/ \E p \in proposers: NewEntry(p) - \/ \E p \in proposers: \E a \in acceptors: TransferEntry(p, a) - -Spec == Init /\ [][Next]_<> - - -\******************************************************************************** -\* Invariants -\******************************************************************************** - -\* we don't track history, but this property is fairly convincing anyway -ElectionSafety == - \A p1, p2 \in proposers: - (/\ prop_state[p1].state = "leader" - /\ prop_state[p2].state = "leader" - /\ prop_state[p1].term = prop_state[p2].term) => (p1 = p2) - -LogIsMonotonic == - \A a \in acceptors: - \A i \in DOMAIN acc_state[a].wal: \A j \in DOMAIN acc_state[a].wal: - (i > j) => (/\ acc_state[a].wal[i].lsn > acc_state[a].wal[j].lsn - /\ acc_state[a].wal[i].epoch >= acc_state[a].wal[j].epoch) - -\* Main invariant: log under commit_lsn must match everywhere. -LogSafety == - \A a1 \in acceptors: \A a2 \in acceptors: - LET - common_len == Min(commit_lsns[a1], commit_lsns[a2]) - IN - SubSeq(acc_state[a1].wal, 1, common_len) = SubSeq(acc_state[a2].wal, 1, common_len) - -\* Next record we are going to push to acceptor must never overwrite committed -\* different record. -CommittedNotOverwritten == - \A p \in proposers: \A a \in acceptors: - (/\ prop_state[p].state = "leader" - /\ prop_state[p].term = acc_state[a].term - /\ a \in DOMAIN prop_state[p].next_send_lsn) => - LET - next_e == NextEntry(p, a) - IN - (next_e /= NULL) => - ((commit_lsns[a] >= next_e.lsn) => (acc_state[a].wal[next_e.lsn] = next_e)) - - -==== \ No newline at end of file diff --git a/safekeeper/spec/ProposerAcceptorStatic.tla b/safekeeper/spec/ProposerAcceptorStatic.tla new file mode 100644 index 000000000000..b2d2f005dba8 --- /dev/null +++ b/safekeeper/spec/ProposerAcceptorStatic.tla @@ -0,0 +1,449 @@ +---- MODULE ProposerAcceptorStatic ---- + +(* + The protocol is very similar to Raft. The key differences are: + - Leaders (proposers) are separated from storage nodes (acceptors), which has + been already an established way to think about Paxos. + - We don't want to stamp each log record with term, so instead carry around + term histories which are sequences of pairs. + As a bonus (and subtlety) this allows the proposer to commit entries from + previous terms without writing new records -- if acceptor's log is caught + up, update of term history on it updates last_log_term as well. +*) + +\* Model simplifications: +\* - Instant message delivery. Notably, ProposerElected message (TruncateWal action) is not +\* delayed, so we don't attempt to truncate WAL when the same wp already appended something +\* on the acceptor since common point had been calculated (this should be rejected). +\* - old WAL is immediately copied to proposer on its election, without on-demand fetch later. + +\* Some ideas how to break it to play around to get a feeling: +\* - replace Quorums with BadQuorums. +\* - remove 'don't commit entries from previous terms separately' rule in +\* CommitEntries and observe figure 8 from the raft paper. +\* With p2a3t4l4 32 steps error was found in 1h on 80 cores. + +EXTENDS Integers, Sequences, FiniteSets, TLC + +VARIABLES + prop_state, \* prop_state[p] is state of proposer p + acc_state, \* acc_state[a] is state of acceptor a + committed, \* bag (set) of ever committed <> entries + elected_history \* counter for elected terms, see TypeOk for details + +CONSTANT + acceptors, + proposers + +CONSTANT NULL + +\******************************************************************************** +\* Helpers +\******************************************************************************** + +Maximum(S) == + (*************************************************************************) + (* If S is a set of numbers, then this define Maximum(S) to be the *) + (* maximum of those numbers, or -1 if S is empty. *) + (*************************************************************************) + IF S = {} THEN -1 ELSE CHOOSE n \in S : \A m \in S : n \geq m + +\* minimum of numbers in the set, error if set is empty +Minimum(S) == CHOOSE min \in S : \A n \in S : min <= n + +\* Min of two numbers +Min(a, b) == IF a < b THEN a ELSE b + +\* Sort of 0 for functions +EmptyF == [x \in {} |-> 42] +IsEmptyF(f) == DOMAIN f = {} + +\* Set of values (image) of the function f. Apparently no such builtin. +Range(f) == {f[x] : x \in DOMAIN f} + +\* If key k is in function f, map it using l, otherwise insert v. Returns the +\* updated function. +Upsert(f, k, v, l(_)) == + LET new_val == IF k \in DOMAIN f THEN l(f[k]) ELSE v IN + (k :> new_val) @@ f + +\***************** + +NumAccs == Cardinality(acceptors) + +\* does acc_set form the quorum? +Quorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2 + 1) +\* all quorums of acceptors +Quorums == {subset \in SUBSET acceptors: Quorum(subset)} + +\* For substituting Quorums and seeing what happens. +BadQuorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2) +BadQuorums == {subset \in SUBSET acceptors: BadQuorum(subset)} + +\* flushLsn (end of WAL, i.e. index of next entry) of acceptor a. +FlushLsn(a) == Len(acc_state[a].wal) + 1 + +\* Typedefs. Note that TLA+ Nat includes zero. +Terms == Nat +Lsns == Nat + +\******************************************************************************** +\* Type assertion +\******************************************************************************** +\* Defining sets of all possible tuples and using them in TypeOk in usual +\* all-tuples constructor is not practical because such definitions force +\* TLC to enumerate them, while they are are horribly enormous +\* (TLC screams "Attempted to construct a set with too many elements"). +\* So instead check types manually. + + +\* Term history is a sequence of pairs. +IsTermHistory(th) == + \A th_entry \in Range(th): th_entry.term \in Terms /\ th_entry.lsn \in Lsns + +IsWal(w) == + \A i \in DOMAIN w: + /\ i \in Lsns + /\ w[i] \in Terms + +TypeOk == + /\ \A p \in proposers: + \* '_' in field names hinders pretty printing + \* https://github.com/tlaplus/tlaplus/issues/1051 + \* so use camel case. + /\ DOMAIN prop_state[p] = {"state", "term", "votes", "termHistory", "wal", "nextSendLsn"} + \* In campaign proposer sends RequestVote and waits for acks; + \* in leader he is elected. + /\ prop_state[p].state \in {"campaign", "leader"} + \* term for which it will campaign, or won term in leader state + /\ prop_state[p].term \in Terms + \* votes received + /\ \A voter \in DOMAIN prop_state[p].votes: voter \in acceptors + /\ \A vote \in Range(prop_state[p].votes): + /\ IsTermHistory(vote.termHistory) + /\ vote.flushLsn \in Lsns + \* Proposer's term history. Empty while proposer is in "campaign". + /\ IsTermHistory(prop_state[p].termHistory) + \* In the model we identify WAL entries only by pairs + \* without additional unique id, which is enough for its purposes. + \* It means that with term history fully modeled wal becomes + \* redundant as it can be computed from term history + WAL length. + \* However, we still keep it here and at acceptors as explicit sequence + \* where index is LSN and value is the term to avoid artificial mapping to + \* figure out real entries. It shouldn't bloat model much because this + \* doesn't increase number of distinct states. + /\ IsWal(prop_state[p].wal) + \* Map of acceptor -> next lsn to send. It is set when truncate_wal is + \* done so sending entries is allowed only after that. In the impl TCP + \* ensures this ordering. + /\ \A a \in DOMAIN prop_state[p].nextSendLsn: + /\ a \in acceptors + /\ prop_state[p].nextSendLsn[a] \in Lsns + /\ \A a \in acceptors: + /\ DOMAIN acc_state[a] = {"term", "termHistory", "wal"} + /\ acc_state[a].term \in Terms + /\ IsTermHistory(acc_state[a].termHistory) + /\ IsWal(acc_state[a].wal) + /\ \A c \in committed: + /\ c.term \in Terms + /\ c.lsn \in Lsns + \* elected_history is a retrospective map of term -> number of times it was + \* elected, for use in ElectionSafetyFull invariant. For static spec it is + \* fairly convincing that it holds, but with membership change it is less + \* trivial. And as we identify log entries only with , importance + \* of it is quite high as violation of log safety might go undetected if + \* election safety is violated. Note though that this is not always the + \* case, i.e. you can imagine (and TLC should find) schedule where log + \* safety violation is still detected because two leaders with the same term + \* commit histories which are different in previous terms, so it is not that + \* crucial. Plus if spec allows ElectionSafetyFull violation, likely + \* ElectionSafety will also be violated in some schedules. But neither it + \* should bloat the model too much. + /\ \A term \in DOMAIN elected_history: + /\ term \in Terms + /\ elected_history[term] \in Nat + +\******************************************************************************** +\* Initial +\******************************************************************************** + +Init == + /\ prop_state = [p \in proposers |-> [ + state |-> "campaign", + term |-> 1, + votes |-> EmptyF, + termHistory |-> << >>, + wal |-> << >>, + nextSendLsn |-> EmptyF + ]] + /\ acc_state = [a \in acceptors |-> [ + \* There will be no leader in zero term, 1 is the first + \* real. + term |-> 0, + \* Again, leader in term 0 doesn't exist, but we initialize + \* term histories with it to always have common point in + \* them. Lsn is 1 because TLA+ sequences are indexed from 1 + \* (we don't want to truncate WAL out of range). + termHistory |-> << [term |-> 0, lsn |-> 1] >>, + wal |-> << >> + ]] + /\ committed = {} + /\ elected_history = EmptyF + + +\******************************************************************************** +\* Actions +\******************************************************************************** + +\* Proposer loses all state. +\* For simplicity (and to reduct state space), we assume it immediately gets +\* current state from quorum q of acceptors determining the term he will request +\* to vote for. +RestartProposer(p, q) == + /\ Quorum(q) + /\ LET new_term == Maximum({acc_state[a].term : a \in q}) + 1 IN + /\ prop_state' = [prop_state EXCEPT ![p].state = "campaign", + ![p].term = new_term, + ![p].votes = EmptyF, + ![p].termHistory = << >>, + ![p].wal = << >>, + ![p].nextSendLsn = EmptyF] + /\ UNCHANGED <> + +\* Term history of acceptor a's WAL: the one saved truncated to contain only <= +\* local FlushLsn entries. +AcceptorTermHistory(a) == + SelectSeq(acc_state[a].termHistory, LAMBDA th_entry: th_entry.lsn <= FlushLsn(a)) + +\* Acceptor a immediately votes for proposer p. +Vote(p, a) == + /\ prop_state[p].state = "campaign" + /\ acc_state[a].term < prop_state[p].term \* main voting condition + /\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term] + /\ LET + vote == [termHistory |-> AcceptorTermHistory(a), flushLsn |-> FlushLsn(a)] + IN + prop_state' = [prop_state EXCEPT ![p].votes = (a :> vote) @@ prop_state[p].votes] + /\ UNCHANGED <> + + +\* Get lastLogTerm from term history th. +LastLogTerm(th) == th[Len(th)].term + +\* Proposer p gets elected. +BecomeLeader(p) == + /\ prop_state[p].state = "campaign" + /\ Quorum(DOMAIN prop_state[p].votes) + /\ LET + \* Find acceptor with the highest vote. + max_vote_acc == + CHOOSE a \in DOMAIN prop_state[p].votes: + LET v == prop_state[p].votes[a] + IN \A v2 \in Range(prop_state[p].votes): + /\ LastLogTerm(v.termHistory) >= LastLogTerm(v2.termHistory) + /\ (LastLogTerm(v.termHistory) = LastLogTerm(v2.termHistory) => v.flushLsn >= v2.flushLsn) + max_vote == prop_state[p].votes[max_vote_acc] + prop_th == Append(max_vote.termHistory, [term |-> prop_state[p].term, lsn |-> max_vote.flushLsn]) + IN + \* We copy all log preceding proposer's term from the max vote node so + \* make sure it is still on one term with us. This is a model + \* simplification which can be removed, in impl we fetch WAL on demand + \* from safekeeper which has it later. Note though that in case of on + \* demand fetch we must check on donor not only term match, but that + \* truncate_wal had already been done (if it is not max_vote_acc). + /\ acc_state[max_vote_acc].term = prop_state[p].term + /\ prop_state' = [prop_state EXCEPT ![p].state = "leader", + ![p].termHistory = prop_th, + ![p].wal = acc_state[max_vote_acc].wal + ] + /\ elected_history' = Upsert(elected_history, prop_state[p].term, 1, LAMBDA c: c + 1) + /\ UNCHANGED <> + + +\* Acceptor a learns about elected proposer p's term. In impl it matches to +\* VoteRequest/VoteResponse exchange when leader is already elected and is not +\* interested in the vote result. +UpdateTerm(p, a) == + /\ prop_state[p].state = "leader" + /\ acc_state[a].term < prop_state[p].term + /\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term] + /\ UNCHANGED <> + +\* Find highest common point (LSN of the first divergent record) in the logs of +\* proposer p and acceptor a. Returns of the highest common point. +FindHighestCommonPoint(prop_th, acc_th, acc_flush_lsn) == + LET + \* First find index of the highest common term. + \* It must exist because we initialize th with <0, 1>. + last_common_idx == Maximum({i \in 1..Min(Len(prop_th), Len(acc_th)): prop_th[i].term = acc_th[i].term}) + last_common_term == prop_th[last_common_idx].term + \* Now find where it ends at both prop and acc and take min. End of term + \* is the start of the next unless it is the last one; there it is + \* flush_lsn in case of acceptor. In case of proposer it is the current + \* writing position, but it can't be less than flush_lsn, so we + \* take flush_lsn. + acc_common_term_end == IF last_common_idx = Len(acc_th) THEN acc_flush_lsn ELSE acc_th[last_common_idx + 1].lsn + prop_common_term_end == IF last_common_idx = Len(prop_th) THEN acc_flush_lsn ELSE prop_th[last_common_idx + 1].lsn + IN + [term |-> last_common_term, lsn |-> Min(acc_common_term_end, prop_common_term_end)] + +\* Elected proposer p immediately truncates WAL (and term history) of acceptor a +\* before starting streaming. Establishes nextSendLsn for a. +\* +\* In impl this happens at each reconnection, here we also allow to do it multiple times. +TruncateWal(p, a) == + /\ prop_state[p].state = "leader" + /\ acc_state[a].term = prop_state[p].term + /\ LET + hcp == FindHighestCommonPoint(prop_state[p].termHistory, AcceptorTermHistory(a), FlushLsn(a)) + next_send_lsn == (a :> hcp.lsn) @@ prop_state[p].nextSendLsn + IN + \* Acceptor persists full history immediately; reads adjust it to the + \* really existing wal with AcceptorTermHistory. + /\ acc_state' = [acc_state EXCEPT ![a].termHistory = prop_state[p].termHistory, + \* note: SubSeq is inclusive, hence -1. + ![a].wal = SubSeq(acc_state[a].wal, 1, hcp.lsn - 1) + ] + /\ prop_state' = [prop_state EXCEPT ![p].nextSendLsn = next_send_lsn] + /\ UNCHANGED <> + +\* Append new log entry to elected proposer +NewEntry(p) == + /\ prop_state[p].state = "leader" + /\ LET + \* entry consists only of term, index serves as LSN. + new_entry == prop_state[p].term + IN + /\ prop_state' = [prop_state EXCEPT ![p].wal = Append(prop_state[p].wal, new_entry)] + /\ UNCHANGED <> + +\* Immediately append next entry from elected proposer to acceptor a. +AppendEntry(p, a) == + /\ prop_state[p].state = "leader" + /\ acc_state[a].term = prop_state[p].term + /\ a \in DOMAIN prop_state[p].nextSendLsn \* did TruncateWal + /\ prop_state[p].nextSendLsn[a] <= Len(prop_state[p].wal) \* have smth to send + /\ LET + send_lsn == prop_state[p].nextSendLsn[a] + entry == prop_state[p].wal[send_lsn] + \* Since message delivery is instant we don't check that send_lsn follows + \* the last acc record, it must always be true. + IN + /\ prop_state' = [prop_state EXCEPT ![p].nextSendLsn[a] = send_lsn + 1] + /\ acc_state' = [acc_state EXCEPT ![a].wal = Append(acc_state[a].wal, entry)] + /\ UNCHANGED <> + +\* LSN where elected proposer p starts writing its records. +PropStartLsn(p) == + IF prop_state[p].state = "leader" THEN prop_state[p].termHistory[Len(prop_state[p].termHistory)].lsn ELSE NULL + +\* Proposer p commits all entries it can using quorum q. Note that unlike +\* will62794/logless-reconfig this allows to commit entries from previous terms +\* (when conditions for that are met). +CommitEntries(p, q) == + /\ prop_state[p].state = "leader" + /\ \A a \in q: + /\ acc_state[a].term = prop_state[p].term + \* nextSendLsn existence means TruncateWal has happened, it ensures + \* acceptor's WAL (and FlushLsn) are from proper proposer's history. + \* Alternatively we could compare LastLogTerm here, but that's closer to + \* what we do in the impl (we check flushLsn in AppendResponse, but + \* AppendRequest is processed only if HandleElected handling was good). + /\ a \in DOMAIN prop_state[p].nextSendLsn + \* Now find the LSN present on all the quorum. + /\ LET quorum_lsn == Minimum({FlushLsn(a): a \in q}) IN + \* This is the basic Raft rule of not committing entries from previous + \* terms except along with current term entry (commit them only when + \* quorum recovers, i.e. last_log_term on it reaches leader's term). + /\ quorum_lsn >= PropStartLsn(p) + /\ committed' = committed \cup {[term |-> prop_state[p].wal[lsn], lsn |-> lsn]: lsn \in 1..(quorum_lsn - 1)} + /\ UNCHANGED <> + +\******************************************************************************* +\* Final spec +\******************************************************************************* + +Next == + \/ \E q \in Quorums: \E p \in proposers: RestartProposer(p, q) + \/ \E p \in proposers: \E a \in acceptors: Vote(p, a) + \/ \E p \in proposers: BecomeLeader(p) + \/ \E p \in proposers: \E a \in acceptors: UpdateTerm(p, a) + \/ \E p \in proposers: \E a \in acceptors: TruncateWal(p, a) + \/ \E p \in proposers: NewEntry(p) + \/ \E p \in proposers: \E a \in acceptors: AppendEntry(p, a) + \/ \E q \in Quorums: \E p \in proposers: CommitEntries(p, q) + +Spec == Init /\ [][Next]_<> + + +\******************************************************************************** +\* Invariants +\******************************************************************************** + +\* Lighter version of ElectionSafetyFull which doesn't require elected_history. +ElectionSafety == + \A p1, p2 \in proposers: + (/\ prop_state[p1].state = "leader" + /\ prop_state[p2].state = "leader" + /\ prop_state[p1].term = prop_state[p2].term) => (p1 = p2) + +\* Single term must never be elected more than once. +ElectionSafetyFull == \A term \in DOMAIN elected_history: elected_history[term] <= 1 + +\* Log is expected to be monotonic by comparison. This is not true +\* in variants of multi Paxos, but in Raft (and here) it is. +LogIsMonotonic == + \A a \in acceptors: + \A i, j \in DOMAIN acc_state[a].wal: + (i > j) => (acc_state[a].wal[i] >= acc_state[a].wal[j]) + +\* Main invariant: If two entries are committed at the same LSN, they must be +\* the same entry. +LogSafety == + \A c1, c2 \in committed: (c1.lsn = c2.lsn) => (c1 = c2) + + +\******************************************************************************** +\* Invariants which don't need to hold, but useful for playing/debugging. +\******************************************************************************** + +\* Limits term of elected proposers +MaxTerm == \A p \in proposers: (prop_state[p].state = "leader" => prop_state[p].term < 2) + +MaxAccWalLen == \A a \in acceptors: Len(acc_state[a].wal) < 2 + +\* Limits max number of committed entries. That way we can check that we'are +\* actually committing something. +MaxCommitLsn == Cardinality(committed) < 2 + +\* How many records with different terms can be removed in single WAL +\* truncation. +MaxTruncatedTerms == + \A p \in proposers: \A a \in acceptors: + (/\ prop_state[p].state = "leader" + /\ prop_state[p].term = acc_state[a].term) => + LET + hcp == FindHighestCommonPoint(prop_state[p].termHistory, AcceptorTermHistory(a), FlushLsn(a)) + truncated_lsns == {lsn \in DOMAIN acc_state[a].wal: lsn >= hcp.lsn} + truncated_records_terms == {acc_state[a].wal[lsn]: lsn \in truncated_lsns} + IN + Cardinality(truncated_records_terms) < 2 + +\* Check that TruncateWal never deletes committed record. +\* It might seem that this should an invariant, but it is not. +\* With 5 nodes, it is legit to truncate record which had been +\* globally committed: e.g. nodes abc can commit record of term 1 in +\* term 3, and after that leader of term 2 can delete such record +\* on d. On 10 cores TLC can find such a trace in ~7 hours. +CommittedNotTruncated == + \A p \in proposers: \A a \in acceptors: + (/\ prop_state[p].state = "leader" + /\ prop_state[p].term = acc_state[a].term) => + LET + hcp == FindHighestCommonPoint(prop_state[p].termHistory, AcceptorTermHistory(a), FlushLsn(a)) + truncated_lsns == {lsn \in DOMAIN acc_state[a].wal: lsn >= hcp.lsn} + truncated_records == {[term |-> acc_state[a].wal[lsn], lsn |-> lsn]: lsn \in truncated_lsns} + IN + \A r \in truncated_records: r \notin committed + +==== diff --git a/safekeeper/spec/modelcheck.sh b/safekeeper/spec/modelcheck.sh new file mode 100755 index 000000000000..21ead7dad860 --- /dev/null +++ b/safekeeper/spec/modelcheck.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Usage: ./modelcheck.sh , e.g. +# ./modelcheck.sh models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg MCProposerAcceptorStatic.tla +CONFIG=$1 +SPEC=$2 + +MEM=7G +TOOLSPATH="/opt/TLA+Toolbox/tla2tools.jar" + +mkdir -p "tlc-results" +CONFIG_FILE=$(basename -- "$CONFIG") +outfilename="$SPEC-${CONFIG_FILE}-$(date --utc +%Y-%m-%d--%H-%M-%S)".log +outfile="tlc-results/$outfilename" +touch $outfile + +# Save some info about the run. +GIT_REV=`git rev-parse --short HEAD` +INFO=`uname -a` + +# First for Linux, second for Mac. +CPUNAMELinux=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1') +CPUCORESLinux=`nproc` +CPUNAMEMac=`sysctl -n machdep.cpu.brand_string` +CPUCORESMac=`sysctl -n machdep.cpu.thread_count` + +echo "git revision: $GIT_REV" >> $outfile +echo "Platform: $INFO" >> $outfile +echo "CPU Info Linux: $CPUNAMELinux" >> $outfile +echo "CPU Cores Linux: $CPUCORESLinux" >> $outfile +echo "CPU Info Mac: $CPUNAMEMac" >> $outfile +echo "CPU Cores Mac: $CPUCORESMac" >> $outfile +echo "Spec: $SPEC" >> $outfile +echo "Config: $CONFIG" >> $outfile +echo "----" >> $outfile +cat $CONFIG >> $outfile +echo "" >> $outfile +echo "----" >> $outfile +echo "" >> $outfile + +# see +# https://lamport.azurewebsites.net/tla/current-tools.pdf +# for TLC options. +# OffHeapDiskFPSet is the optimal fingerprint set implementation +# https://docs.tlapl.us/codebase:architecture#fingerprint_sets_fpsets +# +# Add -simulate to run in infinite simulation mode. +java -Xmx$MEM -XX:MaxDirectMemorySize=$MEM -XX:+UseParallelGC -Dtlc2.tool.fp.FPSet.impl=tlc2.tool.fp.OffHeapDiskFPSet \ + -cp "${TOOLSPATH}" tlc2.TLC $SPEC -config $CONFIG -workers auto -gzip | tee -a $outfile diff --git a/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t2_l2.cfg b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t2_l2.cfg new file mode 100644 index 000000000000..c06109c60110 --- /dev/null +++ b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t2_l2.cfg @@ -0,0 +1,19 @@ +\* A very small model just to play. +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3} +max_term = 2 +max_entries = 2 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafetyFull +LogIsMonotonic +LogSafety +CommittedNotTruncated +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias + diff --git a/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t3_l2.cfg b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t3_l2.cfg new file mode 100644 index 000000000000..5d10fa960f06 --- /dev/null +++ b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t3_l2.cfg @@ -0,0 +1,19 @@ +\* A model next to the smallest one. +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3} +max_term = 3 +max_entries = 2 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafetyFull +LogIsMonotonic +LogSafety +CommittedNotTruncated +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias + diff --git a/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg new file mode 100644 index 000000000000..8ba8ce95a410 --- /dev/null +++ b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg @@ -0,0 +1,17 @@ +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3} +max_term = 3 +max_entries = 3 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +CommittedNotTruncated +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias diff --git a/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t4_l4.cfg b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t4_l4.cfg new file mode 100644 index 000000000000..4763a34ec410 --- /dev/null +++ b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t4_l4.cfg @@ -0,0 +1,17 @@ +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3} +max_term = 4 +max_entries = 4 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +CommittedNotTruncated +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias diff --git a/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t2_l2.cfg b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t2_l2.cfg new file mode 100644 index 000000000000..ebf4724633fd --- /dev/null +++ b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t2_l2.cfg @@ -0,0 +1,16 @@ +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3, a4, a5} +max_term = 2 +max_entries = 2 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias diff --git a/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t3_l3.cfg b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t3_l3.cfg new file mode 100644 index 000000000000..bb77350c58a1 --- /dev/null +++ b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t3_l3.cfg @@ -0,0 +1,16 @@ +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3, a4, a5} +max_term = 3 +max_entries = 3 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias diff --git a/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t4_l3.cfg b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t4_l3.cfg new file mode 100644 index 000000000000..9a5e142f9925 --- /dev/null +++ b/safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t4_l3.cfg @@ -0,0 +1,16 @@ +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3, a4, a5} +max_term = 4 +max_entries = 3 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias diff --git a/safekeeper/spec/readme.md b/safekeeper/spec/readme.md new file mode 100644 index 000000000000..ec2649d87da5 --- /dev/null +++ b/safekeeper/spec/readme.md @@ -0,0 +1,12 @@ +The specifications, models and results of running of them of the compute <-> +safekeepers consensus algorithm for committing WAL on the fleet of safekeepers. +Following Paxos parlance, compute which writes WAL is called (WAL) proposer here +and safekeepers which persist it are called (WAL) acceptors. + +Directory structure: +- Use modelcheck.sh to run TLC. +- MC*.tla contains bits of TLA+ needed for TLC like constraining the state space, and models/ actual models. +- Other .tla files are the actual specs. + +Structure is partially borrowed from +[logless-reconfig](https://github.com/will62794/logless-reconfig), thanks to it. diff --git a/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t2_l2.cfg-2024-11-06--13-44-17.log b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t2_l2.cfg-2024-11-06--13-44-17.log new file mode 100644 index 000000000000..768722b1eb41 --- /dev/null +++ b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t2_l2.cfg-2024-11-06--13-44-17.log @@ -0,0 +1,63 @@ +git revision: 864f4667d +Platform: Linux neon-dev-arm64-1 6.8.0-48-generic #48-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 27 14:35:45 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux +CPU Info Linux: Neoverse-N1 +CPU Cores Linux: 80 +CPU Info Mac: +CPU Cores Mac: +Spec: MCProposerAcceptorStatic.tla +Config: models/MCProposerAcceptorStatic_p2_a3_t2_l2.cfg +---- +\* A very small model just to play. +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3} +max_term = 2 +max_entries = 2 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +CommittedNotTruncated +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias + + +---- + +TLC2 Version 2.20 of Day Month 20?? (rev: f68cb71) +Running breadth-first search Model-Checking with fp 110 and seed 3949669318051689745 with 80 workers on 80 cores with 54613MB heap and 61440MB offheap memory [pid: 46037] (Linux 6.8.0-48-generic aarch64, Ubuntu 21.0.4 x86_64, OffHeapDiskFPSet, DiskStateQueue). +Parsing file /home/arseny/neon/safekeeper/spec/MCProposerAcceptorStatic.tla +Parsing file /tmp/tlc-11123278435718411444/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla) +Parsing file /home/arseny/neon/safekeeper/spec/ProposerAcceptorStatic.tla +Parsing file /tmp/tlc-11123278435718411444/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla) +Parsing file /tmp/tlc-11123278435718411444/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla) +Parsing file /tmp/tlc-11123278435718411444/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla) +Parsing file /tmp/tlc-11123278435718411444/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla) +Parsing file /tmp/tlc-11123278435718411444/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla) +Parsing file /tmp/tlc-11123278435718411444/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla) +Semantic processing of module Naturals +Semantic processing of module Sequences +Semantic processing of module FiniteSets +Semantic processing of module TLC +Semantic processing of module Integers +Semantic processing of module ProposerAcceptorStatic +Semantic processing of module TLCExt +Semantic processing of module _TLCTrace +Semantic processing of module MCProposerAcceptorStatic +Starting... (2024-11-06 13:44:18) +Computing initial states... +Finished computing initial states: 1 distinct state generated at 2024-11-06 13:44:20. +Model checking completed. No error has been found. + Estimates of the probability that TLC did not check all reachable states + because two distinct states had the same fingerprint: + calculated (optimistic): val = 2.9E-9 + based on the actual fingerprints: val = 4.1E-10 +922134 states generated, 61249 distinct states found, 0 states left on queue. +The depth of the complete state graph search is 31. +The average outdegree of the complete state graph is 1 (minimum is 0, the maximum 6 and the 95th percentile is 3). +Finished in 11s at (2024-11-06 13:44:28) diff --git a/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t3_l2.cfg-2024-11-15--09-09-58.log b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t3_l2.cfg-2024-11-15--09-09-58.log new file mode 100644 index 000000000000..ae3ba98da61e --- /dev/null +++ b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t3_l2.cfg-2024-11-15--09-09-58.log @@ -0,0 +1,69 @@ +git revision: bcbff084a +Platform: Linux nonlibrem 6.10.11-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.10.11-1 (2024-09-22) x86_64 GNU/Linux +CPU Info Linux: 13th Gen Intel(R) Core(TM) i7-1355U +CPU Cores Linux: 10 +CPU Info Mac: +CPU Cores Mac: +Spec: MCProposerAcceptorStatic.tla +Config: models/MCProposerAcceptorStatic_p2_a3_t3_l2.cfg +---- +\* A model next to the smallest one. +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3} +max_term = 3 +max_entries = 2 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +CommittedNotTruncated +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias + + +---- + +TLC2 Version 2.20 of Day Month 20?? (rev: cc65eef) +Running breadth-first search Model-Checking with fp 41 and seed -3061068726727581619 with 10 workers on 10 cores with 6372MB heap and 7168MB offheap memory [pid: 1250346] (Linux 6.10.11-amd64 amd64, Debian 21.0.5 x86_64, OffHeapDiskFPSet, DiskStateQueue). +Parsing file /home/ars/neon/neon/safekeeper/spec/MCProposerAcceptorStatic.tla +Parsing file /tmp/tlc-3023124431504466774/TLC.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/TLC.tla) +Parsing file /home/ars/neon/neon/safekeeper/spec/ProposerAcceptorStatic.tla +Parsing file /tmp/tlc-3023124431504466774/_TLCTrace.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla) +Parsing file /tmp/tlc-3023124431504466774/Integers.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/Integers.tla) +Parsing file /tmp/tlc-3023124431504466774/Sequences.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla) +Parsing file /tmp/tlc-3023124431504466774/FiniteSets.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla) +Parsing file /tmp/tlc-3023124431504466774/Naturals.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla) +Parsing file /tmp/tlc-3023124431504466774/TLCExt.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla) +Semantic processing of module Naturals +Semantic processing of module Sequences +Semantic processing of module FiniteSets +Semantic processing of module TLC +Semantic processing of module Integers +Semantic processing of module ProposerAcceptorStatic +Semantic processing of module TLCExt +Semantic processing of module _TLCTrace +Semantic processing of module MCProposerAcceptorStatic +Starting... (2024-11-15 12:09:59) +Computing initial states... +Finished computing initial states: 1 distinct state generated at 2024-11-15 12:10:00. +Progress(19) at 2024-11-15 12:10:03: 464,696 states generated (464,696 s/min), 57,859 distinct states found (57,859 ds/min), 21,435 states left on queue. +Progress(26) at 2024-11-15 12:11:03: 8,813,399 states generated (8,348,703 s/min), 877,254 distinct states found (819,395 ds/min), 214,794 states left on queue. +Progress(27) at 2024-11-15 12:12:03: 16,121,858 states generated (7,308,459 s/min), 1,464,707 distinct states found (587,453 ds/min), 274,230 states left on queue. +Progress(29) at 2024-11-15 12:13:03: 23,073,903 states generated (6,952,045 s/min), 1,948,802 distinct states found (484,095 ds/min), 263,697 states left on queue. +Progress(31) at 2024-11-15 12:14:03: 29,740,681 states generated (6,666,778 s/min), 2,331,052 distinct states found (382,250 ds/min), 185,484 states left on queue. +Progress(34) at 2024-11-15 12:15:03: 36,085,876 states generated (6,345,195 s/min), 2,602,370 distinct states found (271,318 ds/min), 31,659 states left on queue. +Model checking completed. No error has been found. + Estimates of the probability that TLC did not check all reachable states + because two distinct states had the same fingerprint: + calculated (optimistic): val = 4.9E-6 + based on the actual fingerprints: val = 6.9E-7 +36896322 states generated, 2623542 distinct states found, 0 states left on queue. +The depth of the complete state graph search is 39. +The average outdegree of the complete state graph is 1 (minimum is 0, the maximum 7 and the 95th percentile is 3). +Finished in 05min 14s at (2024-11-15 12:15:13) diff --git a/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t3_l3.cfg-2024-11-06--13-03-51.log b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t3_l3.cfg-2024-11-06--13-03-51.log new file mode 100644 index 000000000000..46f21cee72ce --- /dev/null +++ b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t3_l3.cfg-2024-11-06--13-03-51.log @@ -0,0 +1,72 @@ +git revision: 864f4667d +Platform: Linux neon-dev-arm64-1 6.8.0-48-generic #48-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 27 14:35:45 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux +CPU Info Linux: Neoverse-N1 +CPU Cores Linux: 80 +CPU Info Mac: +CPU Cores Mac: +Spec: MCProposerAcceptorStatic.tla +Config: models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg +---- +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3} +max_term = 3 +max_entries = 3 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +CommittedNotTruncated +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias + +---- + +TLC2 Version 2.20 of Day Month 20?? (rev: f68cb71) +Running breadth-first search Model-Checking with fp 126 and seed 2302892334567572769 with 80 workers on 80 cores with 54613MB heap and 61440MB offheap memory [pid: 39701] (Linux 6.8.0-48-generic aarch64, Ubuntu 21.0.4 x86_64, OffHeapDiskFPSet, DiskStateQueue). +Parsing file /home/arseny/neon/safekeeper/spec/MCProposerAcceptorStatic.tla +Parsing file /tmp/tlc-15178810317173795942/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla) +Parsing file /home/arseny/neon/safekeeper/spec/ProposerAcceptorStatic.tla +Parsing file /tmp/tlc-15178810317173795942/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla) +Parsing file /tmp/tlc-15178810317173795942/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla) +Parsing file /tmp/tlc-15178810317173795942/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla) +Parsing file /tmp/tlc-15178810317173795942/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla) +Parsing file /tmp/tlc-15178810317173795942/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla) +Parsing file /tmp/tlc-15178810317173795942/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla) +Semantic processing of module Naturals +Semantic processing of module Sequences +Semantic processing of module FiniteSets +Semantic processing of module TLC +Semantic processing of module Integers +Semantic processing of module ProposerAcceptorStatic +Semantic processing of module TLCExt +Semantic processing of module _TLCTrace +Semantic processing of module MCProposerAcceptorStatic +Starting... (2024-11-06 13:03:52) +Computing initial states... +Finished computing initial states: 1 distinct state generated at 2024-11-06 13:03:55. +Progress(21) at 2024-11-06 13:03:58: 846,240 states generated (846,240 s/min), 106,298 distinct states found (106,298 ds/min), 41,028 states left on queue. +Progress(28) at 2024-11-06 13:04:58: 27,538,211 states generated (26,691,971 s/min), 2,768,793 distinct states found (2,662,495 ds/min), 782,984 states left on queue. +Progress(30) at 2024-11-06 13:05:58: 54,048,763 states generated (26,510,552 s/min), 5,076,745 distinct states found (2,307,952 ds/min), 1,241,301 states left on queue. +Progress(31) at 2024-11-06 13:06:58: 80,554,724 states generated (26,505,961 s/min), 7,199,201 distinct states found (2,122,456 ds/min), 1,541,574 states left on queue. +Progress(32) at 2024-11-06 13:07:58: 106,991,261 states generated (26,436,537 s/min), 9,121,549 distinct states found (1,922,348 ds/min), 1,686,289 states left on queue. +Progress(33) at 2024-11-06 13:08:58: 133,354,665 states generated (26,363,404 s/min), 10,935,451 distinct states found (1,813,902 ds/min), 1,739,977 states left on queue. +Progress(34) at 2024-11-06 13:09:58: 159,631,385 states generated (26,276,720 s/min), 12,605,372 distinct states found (1,669,921 ds/min), 1,677,447 states left on queue. +Progress(35) at 2024-11-06 13:10:58: 185,862,196 states generated (26,230,811 s/min), 14,138,409 distinct states found (1,533,037 ds/min), 1,501,760 states left on queue. +Progress(36) at 2024-11-06 13:11:58: 212,021,688 states generated (26,159,492 s/min), 15,538,990 distinct states found (1,400,581 ds/min), 1,216,621 states left on queue. +Progress(37) at 2024-11-06 13:12:58: 238,046,160 states generated (26,024,472 s/min), 16,778,583 distinct states found (1,239,593 ds/min), 797,230 states left on queue. +Progress(39) at 2024-11-06 13:13:58: 263,931,163 states generated (25,885,003 s/min), 17,820,786 distinct states found (1,042,203 ds/min), 209,400 states left on queue. +Model checking completed. No error has been found. + Estimates of the probability that TLC did not check all reachable states + because two distinct states had the same fingerprint: + calculated (optimistic): val = 2.5E-4 + based on the actual fingerprints: val = 7.9E-5 +270257170 states generated, 18005639 distinct states found, 0 states left on queue. +The depth of the complete state graph search is 47. +The average outdegree of the complete state graph is 1 (minimum is 0, the maximum 7 and the 95th percentile is 3). +Finished in 10min 25s at (2024-11-06 13:14:17) diff --git a/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t4_l4.cfg-2024-11-06--14-20-25.log b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t4_l4.cfg-2024-11-06--14-20-25.log new file mode 100644 index 000000000000..c7cc853af0e0 --- /dev/null +++ b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t4_l4.cfg-2024-11-06--14-20-25.log @@ -0,0 +1,1466 @@ +# Shows LogSafety violation when "don't commit separately entries from previous terms" check is disabled. +git revision: 4f1ee6331 +Platform: Linux neon-dev-arm64-1 6.8.0-48-generic #48-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 27 14:35:45 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux +CPU Info Linux: Neoverse-N1 +CPU Cores Linux: 80 +CPU Info Mac: +CPU Cores Mac: +Spec: MCProposerAcceptorStatic.tla +Config: models/MCProposerAcceptorStatic_p2_a3_t4_l4.cfg +---- +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3} +max_term = 4 +max_entries = 4 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias + +---- + +TLC2 Version 2.20 of Day Month 20?? (rev: f68cb71) +Running breadth-first search Model-Checking with fp 12 and seed -5379034126224420237 with 80 workers on 80 cores with 54613MB heap and 61440MB offheap memory [pid: 52295] (Linux 6.8.0-48-generic aarch64, Ubuntu 21.0.4 x86_64, OffHeapDiskFPSet, DiskStateQueue). +Parsing file /home/arseny/neon/safekeeper/spec/MCProposerAcceptorStatic.tla +Parsing file /tmp/tlc-4533438058229992850/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla) +Parsing file /home/arseny/neon/safekeeper/spec/ProposerAcceptorStatic.tla +Parsing file /tmp/tlc-4533438058229992850/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla) +Parsing file /tmp/tlc-4533438058229992850/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla) +Parsing file /tmp/tlc-4533438058229992850/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla) +Parsing file /tmp/tlc-4533438058229992850/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla) +Parsing file /tmp/tlc-4533438058229992850/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla) +Parsing file /tmp/tlc-4533438058229992850/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla) +Semantic processing of module Naturals +Semantic processing of module Sequences +Semantic processing of module FiniteSets +Semantic processing of module TLC +Semantic processing of module Integers +Semantic processing of module ProposerAcceptorStatic +Semantic processing of module TLCExt +Semantic processing of module _TLCTrace +Semantic processing of module MCProposerAcceptorStatic +Starting... (2024-11-06 14:20:26) +Computing initial states... +Finished computing initial states: 1 distinct state generated at 2024-11-06 14:20:29. +Progress(20) at 2024-11-06 14:20:32: 1,011,898 states generated (1,011,898 s/min), 140,947 distinct states found (140,947 ds/min), 60,535 states left on queue. +Progress(26) at 2024-11-06 14:21:32: 30,146,518 states generated (29,134,620 s/min), 3,742,736 distinct states found (3,601,789 ds/min), 1,438,779 states left on queue. +Progress(27) at 2024-11-06 14:22:32: 59,362,708 states generated (29,216,190 s/min), 7,210,233 distinct states found (3,467,497 ds/min), 2,708,295 states left on queue. +Progress(28) at 2024-11-06 14:23:32: 88,589,291 states generated (29,226,583 s/min), 10,552,781 distinct states found (3,342,548 ds/min), 3,874,296 states left on queue. +Progress(29) at 2024-11-06 14:24:32: 117,894,209 states generated (29,304,918 s/min), 13,932,498 distinct states found (3,379,717 ds/min), 5,069,960 states left on queue. +Progress(29) at 2024-11-06 14:25:32: 147,338,882 states generated (29,444,673 s/min), 17,180,069 distinct states found (3,247,571 ds/min), 6,146,371 states left on queue. +Progress(29) at 2024-11-06 14:26:32: 176,498,135 states generated (29,159,253 s/min), 20,547,926 distinct states found (3,367,857 ds/min), 7,338,835 states left on queue. +Progress(30) at 2024-11-06 14:27:32: 205,957,044 states generated (29,458,909 s/min), 23,661,090 distinct states found (3,113,164 ds/min), 8,293,570 states left on queue. +Progress(30) at 2024-11-06 14:28:32: 235,390,133 states generated (29,433,089 s/min), 26,892,306 distinct states found (3,231,216 ds/min), 9,369,229 states left on queue. +Progress(30) at 2024-11-06 14:29:32: 264,571,938 states generated (29,181,805 s/min), 30,176,971 distinct states found (3,284,665 ds/min), 10,493,429 states left on queue. +Progress(31) at 2024-11-06 14:30:32: 293,928,191 states generated (29,356,253 s/min), 33,296,160 distinct states found (3,119,189 ds/min), 11,463,686 states left on queue. +Progress(31) at 2024-11-06 14:31:32: 323,436,668 states generated (29,508,477 s/min), 36,347,973 distinct states found (3,051,813 ds/min), 12,365,578 states left on queue. +Progress(31) at 2024-11-06 14:32:32: 352,943,790 states generated (29,507,122 s/min), 39,465,244 distinct states found (3,117,271 ds/min), 13,349,544 states left on queue. +Progress(31) at 2024-11-06 14:33:32: 382,292,863 states generated (29,349,073 s/min), 42,654,621 distinct states found (3,189,377 ds/min), 14,384,363 states left on queue. +Progress(31) at 2024-11-06 14:34:32: 411,385,854 states generated (29,092,991 s/min), 45,941,145 distinct states found (3,286,524 ds/min), 15,509,450 states left on queue. +Progress(31) at 2024-11-06 14:35:32: 440,738,756 states generated (29,352,902 s/min), 48,984,566 distinct states found (3,043,421 ds/min), 16,419,882 states left on queue. +Progress(32) at 2024-11-06 14:36:32: 470,251,558 states generated (29,512,802 s/min), 51,925,693 distinct states found (2,941,127 ds/min), 17,211,457 states left on queue. +Progress(32) at 2024-11-06 14:37:32: 499,714,013 states generated (29,462,455 s/min), 54,955,581 distinct states found (3,029,888 ds/min), 18,114,624 states left on queue. +Progress(32) at 2024-11-06 14:38:32: 529,254,608 states generated (29,540,595 s/min), 57,938,914 distinct states found (2,983,333 ds/min), 18,996,128 states left on queue. +Progress(32) at 2024-11-06 14:39:32: 558,774,398 states generated (29,519,790 s/min), 61,072,943 distinct states found (3,134,029 ds/min), 19,975,689 states left on queue. +Progress(32) at 2024-11-06 14:40:32: 588,134,665 states generated (29,360,267 s/min), 64,148,888 distinct states found (3,075,945 ds/min), 20,922,407 states left on queue. +Progress(32) at 2024-11-06 14:41:32: 617,464,374 states generated (29,329,709 s/min), 67,306,855 distinct states found (3,157,967 ds/min), 21,928,799 states left on queue. +Progress(32) at 2024-11-06 14:42:32: 646,525,281 states generated (29,060,907 s/min), 70,425,194 distinct states found (3,118,339 ds/min), 22,895,971 states left on queue. +Progress(32) at 2024-11-06 14:43:32: 676,054,893 states generated (29,529,612 s/min), 73,351,905 distinct states found (2,926,711 ds/min), 23,703,779 states left on queue. +Progress(33) at 2024-11-06 14:44:32: 705,581,782 states generated (29,526,889 s/min), 76,200,615 distinct states found (2,848,710 ds/min), 24,414,094 states left on queue. +Progress(33) at 2024-11-06 14:45:32: 735,069,836 states generated (29,488,054 s/min), 79,168,244 distinct states found (2,967,629 ds/min), 25,255,224 states left on queue. +Progress(33) at 2024-11-06 14:46:32: 764,659,188 states generated (29,589,352 s/min), 82,024,430 distinct states found (2,856,186 ds/min), 26,011,047 states left on queue. +Progress(33) at 2024-11-06 14:47:32: 794,276,423 states generated (29,617,235 s/min), 84,974,312 distinct states found (2,949,882 ds/min), 26,868,750 states left on queue. +Progress(33) at 2024-11-06 14:48:32: 823,875,831 states generated (29,599,408 s/min), 88,004,386 distinct states found (3,030,074 ds/min), 27,771,984 states left on queue. +Progress(33) at 2024-11-06 14:49:32: 853,138,894 states generated (29,263,063 s/min), 91,006,890 distinct states found (3,002,504 ds/min), 28,636,661 states left on queue. +Checkpointing of run states/24-11-06-14-20-25.868 +Checkpointing completed at (2024-11-06 14:50:32) +Progress(33) at 2024-11-06 14:50:32: 882,514,167 states generated (29,375,273 s/min), 94,011,000 distinct states found (3,004,110 ds/min), 29,534,516 states left on queue. +Progress(33) at 2024-11-06 14:51:32: 911,838,377 states generated (29,324,210 s/min), 97,108,937 distinct states found (3,097,937 ds/min), 30,498,587 states left on queue. +Progress(33) at 2024-11-06 14:52:32: 940,646,920 states generated (28,808,543 s/min), 100,248,865 distinct states found (3,139,928 ds/min), 31,472,191 states left on queue. +Progress(33) at 2024-11-06 14:53:32: 970,074,175 states generated (29,427,255 s/min), 103,170,815 distinct states found (2,921,950 ds/min), 32,265,691 states left on queue. +Progress(33) at 2024-11-06 14:54:32: 999,627,974 states generated (29,553,799 s/min), 106,004,823 distinct states found (2,834,008 ds/min), 33,009,618 states left on queue. +Progress(34) at 2024-11-06 14:55:32: 1,029,148,983 states generated (29,521,009 s/min), 108,740,783 distinct states found (2,735,960 ds/min), 33,616,222 states left on queue. +Progress(34) at 2024-11-06 14:56:32: 1,058,582,001 states generated (29,433,018 s/min), 111,612,965 distinct states found (2,872,182 ds/min), 34,375,212 states left on queue. +Progress(34) at 2024-11-06 14:57:32: 1,088,123,602 states generated (29,541,601 s/min), 114,464,196 distinct states found (2,851,231 ds/min), 35,116,195 states left on queue. +Progress(34) at 2024-11-06 14:58:32: 1,117,684,936 states generated (29,561,334 s/min), 117,252,198 distinct states found (2,788,002 ds/min), 35,817,205 states left on queue. +Progress(34) at 2024-11-06 14:59:32: 1,147,356,249 states generated (29,671,313 s/min), 120,014,476 distinct states found (2,762,278 ds/min), 36,517,255 states left on queue. +Progress(34) at 2024-11-06 15:00:32: 1,176,921,098 states generated (29,564,849 s/min), 122,859,312 distinct states found (2,844,836 ds/min), 37,291,096 states left on queue. +Progress(34) at 2024-11-06 15:01:32: 1,206,454,440 states generated (29,533,342 s/min), 125,830,942 distinct states found (2,971,630 ds/min), 38,147,762 states left on queue. +Progress(34) at 2024-11-06 15:02:32: 1,235,721,673 states generated (29,267,233 s/min), 128,869,493 distinct states found (3,038,551 ds/min), 39,035,481 states left on queue. +Progress(34) at 2024-11-06 15:03:32: 1,265,097,779 states generated (29,376,106 s/min), 131,669,552 distinct states found (2,800,059 ds/min), 39,746,864 states left on queue. +Progress(34) at 2024-11-06 15:04:32: 1,294,408,098 states generated (29,310,319 s/min), 134,604,630 distinct states found (2,935,078 ds/min), 40,584,235 states left on queue. +Progress(34) at 2024-11-06 15:05:32: 1,323,792,755 states generated (29,384,657 s/min), 137,579,390 distinct states found (2,974,760 ds/min), 41,446,478 states left on queue. +Progress(34) at 2024-11-06 15:06:32: 1,353,085,163 states generated (29,292,408 s/min), 140,575,723 distinct states found (2,996,333 ds/min), 42,309,510 states left on queue. +Progress(34) at 2024-11-06 15:07:32: 1,381,809,417 states generated (28,724,254 s/min), 143,655,566 distinct states found (3,079,843 ds/min), 43,220,682 states left on queue. +Progress(34) at 2024-11-06 15:08:32: 1,411,255,848 states generated (29,446,431 s/min), 146,482,192 distinct states found (2,826,626 ds/min), 43,944,938 states left on queue. +Progress(34) at 2024-11-06 15:09:32: 1,440,646,323 states generated (29,390,475 s/min), 149,419,989 distinct states found (2,937,797 ds/min), 44,763,293 states left on queue. +Progress(34) at 2024-11-06 15:10:32: 1,470,298,568 states generated (29,652,245 s/min), 152,041,419 distinct states found (2,621,430 ds/min), 45,311,911 states left on queue. +Progress(35) at 2024-11-06 15:11:32: 1,499,747,712 states generated (29,449,144 s/min), 154,696,867 distinct states found (2,655,448 ds/min), 45,842,895 states left on queue. +Progress(35) at 2024-11-06 15:12:32: 1,529,256,993 states generated (29,509,281 s/min), 157,493,365 distinct states found (2,796,498 ds/min), 46,535,472 states left on queue. +Progress(35) at 2024-11-06 15:13:32: 1,558,829,306 states generated (29,572,313 s/min), 160,256,575 distinct states found (2,763,210 ds/min), 47,212,471 states left on queue. +Progress(35) at 2024-11-06 15:14:32: 1,588,345,878 states generated (29,516,572 s/min), 163,002,602 distinct states found (2,746,027 ds/min), 47,862,117 states left on queue. +Progress(35) at 2024-11-06 15:15:32: 1,617,885,675 states generated (29,539,797 s/min), 165,699,121 distinct states found (2,696,519 ds/min), 48,472,896 states left on queue. +Progress(35) at 2024-11-06 15:16:32: 1,647,559,965 states generated (29,674,290 s/min), 168,343,286 distinct states found (2,644,165 ds/min), 49,065,377 states left on queue. +Progress(35) at 2024-11-06 15:17:32: 1,677,033,250 states generated (29,473,285 s/min), 171,134,409 distinct states found (2,791,123 ds/min), 49,823,330 states left on queue. +Progress(35) at 2024-11-06 15:18:32: 1,706,730,266 states generated (29,697,016 s/min), 173,860,974 distinct states found (2,726,565 ds/min), 50,493,221 states left on queue. +Error: Invariant LogSafety is violated. +Error: The behavior up to this point is: +State 1: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "campaign", + votes |-> <<>>, + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "campaign", + votes |-> <<>>, + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a2 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a3 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 2: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "campaign", + votes |-> <<>>, + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a2 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a3 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 3: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 2, + wal |-> <<>>, + state |-> "campaign", + votes |-> <<>>, + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a2 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a3 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 4: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 2, + wal |-> <<>>, + state |-> "campaign", + votes |-> <<>>, + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a2 :> + [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a3 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 5: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 2, + wal |-> <<>>, + state |-> "campaign", + votes |-> <<>>, + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a2 :> + [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a3 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 6: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 2, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a2 :> + [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a3 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 7: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 1) ] @@ + p2 :> + [ term |-> 2, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a2 :> + [ term |-> 1, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 8: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 1) ] @@ + p2 :> + [ term |-> 2, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a2 :> + [ term |-> 1, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 9: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 1) ] @@ + p2 :> + [ term |-> 2, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@ + a2 :> + [ term |-> 1, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 10: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 1) ] @@ + p2 :> + [ term |-> 2, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + nextSendLsn |-> (a1 :> 1) ] ) +/\ acc_state = ( a1 :> + [ term |-> 2, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 1, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 11: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 1) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<>>, + state |-> "campaign", + votes |-> <<>>, + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 2, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 1, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 12: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 1) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 2, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 1, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 13: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<1>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 1) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 2, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 1, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 14: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 1) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 2, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 1, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 15: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 2) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 2, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 1, + wal |-> <<1>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 16: +/\ prop_state = ( p1 :> + [ term |-> 1, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a2 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + nextSendLsn |-> (a2 :> 3) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 2, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 1, + wal |-> <<1, 1>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 17: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "campaign", + votes |-> <<>>, + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 2, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 1, + wal |-> <<1, 1>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 18: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 1, + wal |-> <<1, 1>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 19: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 20: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> <<>> ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 21: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] ) +/\ committed = {} + +State 22: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 1) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 3, + wal |-> <<>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] ) +/\ committed = {} + +State 23: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 3, + wal |-> <<1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] ) +/\ committed = {} + +State 24: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 3, + wal |-> <<1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] ) +/\ committed = {[term |-> 1, lsn |-> 1]} + +State 25: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "campaign", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 2 ] ), + termHistory |-> <<>>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 4, + wal |-> <<1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] ) +/\ committed = {[term |-> 1, lsn |-> 1]} + +State 26: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 2 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >>, + nextSendLsn |-> <<>> ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 4, + wal |-> <<1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] ) +/\ committed = {[term |-> 1, lsn |-> 1]} + +State 27: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 2 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >>, + nextSendLsn |-> (a3 :> 1) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >> ] ) +/\ committed = {[term |-> 1, lsn |-> 1]} + +State 28: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<4>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 2 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >>, + nextSendLsn |-> (a3 :> 1) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >> ] ) +/\ committed = {[term |-> 1, lsn |-> 1]} + +State 29: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<4>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 2 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >>, + nextSendLsn |-> (a3 :> 2) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 4, + wal |-> <<4>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >> ] ) +/\ committed = {[term |-> 1, lsn |-> 1]} + +State 30: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<4>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 2 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >>, + nextSendLsn |-> (a1 :> 1 @@ a3 :> 2) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 4, + wal |-> <<4>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >> ] ) +/\ committed = {[term |-> 1, lsn |-> 1]} + +State 31: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<4>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 2 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >>, + nextSendLsn |-> (a1 :> 2 @@ a3 :> 2) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<4>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 4, + wal |-> <<4>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >> ] ) +/\ committed = {[term |-> 1, lsn |-> 1]} + +State 32: +/\ prop_state = ( p1 :> + [ term |-> 4, + wal |-> <<4>>, + state |-> "leader", + votes |-> + ( a1 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>, + flushLsn |-> 1 ] @@ + a3 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 2 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >>, + nextSendLsn |-> (a1 :> 2 @@ a3 :> 2) ] @@ + p2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + state |-> "leader", + votes |-> + ( a2 :> + [ termHistory |-> + <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>, + flushLsn |-> 3 ] @@ + a3 :> + [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>, + flushLsn |-> 1 ] ), + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >>, + nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] ) +/\ acc_state = ( a1 :> + [ term |-> 4, + wal |-> <<4>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >> ] @@ + a2 :> + [ term |-> 3, + wal |-> <<1, 1>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 1, lsn |-> 1], + [term |-> 3, lsn |-> 3] >> ] @@ + a3 :> + [ term |-> 4, + wal |-> <<4>>, + termHistory |-> + << [term |-> 0, lsn |-> 1], + [term |-> 2, lsn |-> 1], + [term |-> 4, lsn |-> 1] >> ] ) +/\ committed = {[term |-> 1, lsn |-> 1], [term |-> 4, lsn |-> 1]} + +1712918117 states generated, 174460942 distinct states found, 50658619 states left on queue. +The depth of the complete state graph search is 35. +Finished in 58min 19s at (2024-11-06 15:18:45) +Trace exploration spec path: ./MCProposerAcceptorStatic_TTrace_1730902825.tla diff --git a/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t4_l4.cfg-2024-11-06--15-30-45.log b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t4_l4.cfg-2024-11-06--15-30-45.log new file mode 100644 index 000000000000..8248240dedfb --- /dev/null +++ b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t4_l4.cfg-2024-11-06--15-30-45.log @@ -0,0 +1,1374 @@ +git revision: 4f1ee6331 +Platform: Linux neon-dev-arm64-1 6.8.0-48-generic #48-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 27 14:35:45 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux +CPU Info Linux: Neoverse-N1 +CPU Cores Linux: 80 +CPU Info Mac: +CPU Cores Mac: +Spec: MCProposerAcceptorStatic.tla +Config: models/MCProposerAcceptorStatic_p2_a3_t4_l4.cfg +---- +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3} +max_term = 4 +max_entries = 4 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias + +---- + +TLC2 Version 2.20 of Day Month 20?? (rev: f68cb71) +Running breadth-first search Model-Checking with fp 84 and seed -1069171980999686913 with 80 workers on 80 cores with 54613MB heap and 61440MB offheap memory [pid: 62544] (Linux 6.8.0-48-generic aarch64, Ubuntu 21.0.4 x86_64, OffHeapDiskFPSet, DiskStateQueue). +Parsing file /home/arseny/neon/safekeeper/spec/MCProposerAcceptorStatic.tla +Parsing file /tmp/tlc-6542850091824737097/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla) +Parsing file /home/arseny/neon/safekeeper/spec/ProposerAcceptorStatic.tla +Parsing file /tmp/tlc-6542850091824737097/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla) +Parsing file /tmp/tlc-6542850091824737097/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla) +Parsing file /tmp/tlc-6542850091824737097/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla) +Parsing file /tmp/tlc-6542850091824737097/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla) +Parsing file /tmp/tlc-6542850091824737097/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla) +Parsing file /tmp/tlc-6542850091824737097/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla) +Semantic processing of module Naturals +Semantic processing of module Sequences +Semantic processing of module FiniteSets +Semantic processing of module TLC +Semantic processing of module Integers +Semantic processing of module ProposerAcceptorStatic +Semantic processing of module TLCExt +Semantic processing of module _TLCTrace +Semantic processing of module MCProposerAcceptorStatic +Starting... (2024-11-06 15:30:45) +Computing initial states... +Finished computing initial states: 1 distinct state generated at 2024-11-06 15:30:48. +Progress(20) at 2024-11-06 15:30:51: 956,386 states generated (956,386 s/min), 134,121 distinct states found (134,121 ds/min), 57,996 states left on queue. +Progress(27) at 2024-11-06 15:31:51: 30,048,294 states generated (29,091,908 s/min), 3,778,849 distinct states found (3,644,728 ds/min), 1,463,715 states left on queue. +Progress(28) at 2024-11-06 15:32:51: 59,092,248 states generated (29,043,954 s/min), 7,282,332 distinct states found (3,503,483 ds/min), 2,750,944 states left on queue. +Progress(29) at 2024-11-06 15:33:51: 88,333,136 states generated (29,240,888 s/min), 10,694,325 distinct states found (3,411,993 ds/min), 3,955,744 states left on queue. +Progress(29) at 2024-11-06 15:34:51: 117,708,994 states generated (29,375,858 s/min), 14,000,885 distinct states found (3,306,560 ds/min), 5,067,487 states left on queue. +Progress(30) at 2024-11-06 15:35:51: 146,847,667 states generated (29,138,673 s/min), 17,407,824 distinct states found (3,406,939 ds/min), 6,258,337 states left on queue. +Progress(30) at 2024-11-06 15:36:51: 176,211,801 states generated (29,364,134 s/min), 20,626,933 distinct states found (3,219,109 ds/min), 7,302,661 states left on queue. +Progress(31) at 2024-11-06 15:37:51: 205,665,438 states generated (29,453,637 s/min), 23,877,622 distinct states found (3,250,689 ds/min), 8,361,004 states left on queue. +Progress(31) at 2024-11-06 15:38:51: 234,757,357 states generated (29,091,919 s/min), 27,246,813 distinct states found (3,369,191 ds/min), 9,511,916 states left on queue. +Progress(31) at 2024-11-06 15:39:51: 264,154,436 states generated (29,397,079 s/min), 30,383,069 distinct states found (3,136,256 ds/min), 10,494,238 states left on queue. +Progress(31) at 2024-11-06 15:40:51: 293,638,121 states generated (29,483,685 s/min), 33,498,433 distinct states found (3,115,364 ds/min), 11,429,812 states left on queue. +Progress(32) at 2024-11-06 15:41:51: 323,039,991 states generated (29,401,870 s/min), 36,709,338 distinct states found (3,210,905 ds/min), 12,463,752 states left on queue. +Progress(32) at 2024-11-06 15:42:51: 352,081,458 states generated (29,041,467 s/min), 39,979,938 distinct states found (3,270,600 ds/min), 13,531,461 states left on queue. +Progress(32) at 2024-11-06 15:43:51: 381,472,323 states generated (29,390,865 s/min), 43,147,359 distinct states found (3,167,421 ds/min), 14,513,444 states left on queue. +Progress(32) at 2024-11-06 15:44:51: 410,911,764 states generated (29,439,441 s/min), 46,200,793 distinct states found (3,053,434 ds/min), 15,418,951 states left on queue. +Progress(32) at 2024-11-06 15:45:51: 440,514,627 states generated (29,602,863 s/min), 49,210,279 distinct states found (3,009,486 ds/min), 16,263,879 states left on queue. +Progress(33) at 2024-11-06 15:46:51: 470,070,180 states generated (29,555,553 s/min), 52,317,535 distinct states found (3,107,256 ds/min), 17,200,875 states left on queue. +Progress(33) at 2024-11-06 15:47:51: 499,387,268 states generated (29,317,088 s/min), 55,489,376 distinct states found (3,171,841 ds/min), 18,196,719 states left on queue. +Progress(33) at 2024-11-06 15:48:51: 528,308,354 states generated (28,921,086 s/min), 58,716,400 distinct states found (3,227,024 ds/min), 19,225,822 states left on queue. +Progress(33) at 2024-11-06 15:49:51: 557,626,508 states generated (29,318,154 s/min), 61,861,039 distinct states found (3,144,639 ds/min), 20,172,391 states left on queue. +Progress(33) at 2024-11-06 15:50:51: 587,011,551 states generated (29,385,043 s/min), 64,911,520 distinct states found (3,050,481 ds/min), 21,068,246 states left on queue. +Progress(33) at 2024-11-06 15:51:51: 616,469,665 states generated (29,458,114 s/min), 67,862,377 distinct states found (2,950,857 ds/min), 21,888,495 states left on queue. +Progress(33) at 2024-11-06 15:52:51: 646,037,901 states generated (29,568,236 s/min), 70,774,601 distinct states found (2,912,224 ds/min), 22,642,487 states left on queue. +Progress(33) at 2024-11-06 15:53:51: 675,679,292 states generated (29,641,391 s/min), 73,753,124 distinct states found (2,978,523 ds/min), 23,459,982 states left on queue. +Progress(34) at 2024-11-06 15:54:51: 705,213,119 states generated (29,533,827 s/min), 76,751,356 distinct states found (2,998,232 ds/min), 24,319,315 states left on queue. +Progress(34) at 2024-11-06 15:55:51: 734,548,637 states generated (29,335,518 s/min), 79,865,504 distinct states found (3,114,148 ds/min), 25,270,867 states left on queue. +Progress(34) at 2024-11-06 15:56:51: 763,724,351 states generated (29,175,714 s/min), 82,969,406 distinct states found (3,103,902 ds/min), 26,203,099 states left on queue. +Progress(34) at 2024-11-06 15:57:51: 792,795,916 states generated (29,071,565 s/min), 86,092,913 distinct states found (3,123,507 ds/min), 27,124,641 states left on queue. +Progress(34) at 2024-11-06 15:58:51: 822,084,221 states generated (29,288,305 s/min), 89,196,548 distinct states found (3,103,635 ds/min), 28,028,058 states left on queue. +Progress(34) at 2024-11-06 15:59:51: 851,516,510 states generated (29,432,289 s/min), 92,135,078 distinct states found (2,938,530 ds/min), 28,822,750 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 16:00:51) +Progress(34) at 2024-11-06 16:00:51: 880,891,436 states generated (29,374,926 s/min), 95,133,622 distinct states found (2,998,544 ds/min), 29,669,470 states left on queue. +Progress(34) at 2024-11-06 16:01:51: 910,262,536 states generated (29,371,100 s/min), 98,019,631 distinct states found (2,886,009 ds/min), 30,433,293 states left on queue. +Progress(34) at 2024-11-06 16:02:51: 939,689,255 states generated (29,426,719 s/min), 100,814,884 distinct states found (2,795,253 ds/min), 31,083,132 states left on queue. +Progress(34) at 2024-11-06 16:03:51: 969,299,651 states generated (29,610,396 s/min), 103,664,772 distinct states found (2,849,888 ds/min), 31,821,093 states left on queue. +Progress(34) at 2024-11-06 16:04:51: 999,051,292 states generated (29,751,641 s/min), 106,544,287 distinct states found (2,879,515 ds/min), 32,536,946 states left on queue. +Progress(35) at 2024-11-06 16:05:51: 1,028,690,576 states generated (29,639,284 s/min), 109,444,362 distinct states found (2,900,075 ds/min), 33,326,316 states left on queue. +Progress(35) at 2024-11-06 16:06:51: 1,058,155,400 states generated (29,464,824 s/min), 112,439,937 distinct states found (2,995,575 ds/min), 34,167,604 states left on queue. +Progress(35) at 2024-11-06 16:07:51: 1,087,496,744 states generated (29,341,344 s/min), 115,461,649 distinct states found (3,021,712 ds/min), 35,032,974 states left on queue. +Progress(35) at 2024-11-06 16:08:51: 1,116,663,767 states generated (29,167,023 s/min), 118,482,838 distinct states found (3,021,189 ds/min), 35,902,651 states left on queue. +Progress(35) at 2024-11-06 16:09:51: 1,145,439,918 states generated (28,776,151 s/min), 121,562,159 distinct states found (3,079,321 ds/min), 36,785,088 states left on queue. +Progress(35) at 2024-11-06 16:10:51: 1,174,812,354 states generated (29,372,436 s/min), 124,511,721 distinct states found (2,949,562 ds/min), 37,555,204 states left on queue. +Progress(35) at 2024-11-06 16:11:51: 1,204,150,178 states generated (29,337,824 s/min), 127,579,155 distinct states found (3,067,434 ds/min), 38,425,790 states left on queue. +Progress(35) at 2024-11-06 16:12:51: 1,233,620,353 states generated (29,470,175 s/min), 130,490,427 distinct states found (2,911,272 ds/min), 39,188,412 states left on queue. +Progress(35) at 2024-11-06 16:13:51: 1,263,022,331 states generated (29,401,978 s/min), 133,317,160 distinct states found (2,826,733 ds/min), 39,893,070 states left on queue. +Progress(35) at 2024-11-06 16:14:51: 1,292,411,979 states generated (29,389,648 s/min), 136,229,817 distinct states found (2,912,657 ds/min), 40,666,029 states left on queue. +Progress(35) at 2024-11-06 16:15:51: 1,321,695,856 states generated (29,283,877 s/min), 139,081,910 distinct states found (2,852,093 ds/min), 41,389,715 states left on queue. +Progress(35) at 2024-11-06 16:16:51: 1,351,045,560 states generated (29,349,704 s/min), 141,811,662 distinct states found (2,729,752 ds/min), 41,999,267 states left on queue. +Progress(35) at 2024-11-06 16:17:51: 1,380,677,436 states generated (29,631,876 s/min), 144,516,072 distinct states found (2,704,410 ds/min), 42,579,779 states left on queue. +Progress(35) at 2024-11-06 16:18:51: 1,410,332,660 states generated (29,655,224 s/min), 147,269,848 distinct states found (2,753,776 ds/min), 43,232,732 states left on queue. +Progress(35) at 2024-11-06 16:19:51: 1,440,071,594 states generated (29,738,934 s/min), 150,116,683 distinct states found (2,846,835 ds/min), 43,917,859 states left on queue. +Progress(35) at 2024-11-06 16:20:51: 1,469,737,942 states generated (29,666,348 s/min), 152,881,605 distinct states found (2,764,922 ds/min), 44,594,909 states left on queue. +Progress(36) at 2024-11-06 16:21:51: 1,499,124,482 states generated (29,386,540 s/min), 155,722,313 distinct states found (2,840,708 ds/min), 45,306,186 states left on queue. +Progress(36) at 2024-11-06 16:22:51: 1,528,616,635 states generated (29,492,153 s/min), 158,643,911 distinct states found (2,921,598 ds/min), 46,098,600 states left on queue. +Progress(36) at 2024-11-06 16:23:51: 1,557,820,328 states generated (29,203,693 s/min), 161,651,516 distinct states found (3,007,605 ds/min), 46,958,572 states left on queue. +Progress(36) at 2024-11-06 16:24:51: 1,587,341,565 states generated (29,521,237 s/min), 164,469,424 distinct states found (2,817,908 ds/min), 47,648,932 states left on queue. +Progress(36) at 2024-11-06 16:25:51: 1,616,246,807 states generated (28,905,242 s/min), 167,471,199 distinct states found (3,001,775 ds/min), 48,496,844 states left on queue. +Progress(36) at 2024-11-06 16:26:51: 1,645,107,613 states generated (28,860,806 s/min), 170,454,103 distinct states found (2,982,904 ds/min), 49,283,244 states left on queue. +Progress(36) at 2024-11-06 16:27:51: 1,674,492,314 states generated (29,384,701 s/min), 173,343,045 distinct states found (2,888,942 ds/min), 50,006,895 states left on queue. +Progress(36) at 2024-11-06 16:28:51: 1,703,875,027 states generated (29,382,713 s/min), 176,157,623 distinct states found (2,814,578 ds/min), 50,662,128 states left on queue. +Progress(36) at 2024-11-06 16:29:51: 1,733,099,131 states generated (29,224,104 s/min), 179,186,519 distinct states found (3,028,896 ds/min), 51,498,029 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 16:30:52) +Progress(36) at 2024-11-06 16:30:52: 1,762,724,622 states generated (29,625,491 s/min), 181,958,595 distinct states found (2,772,076 ds/min), 52,142,450 states left on queue. +Progress(36) at 2024-11-06 16:31:52: 1,792,118,288 states generated (29,393,666 s/min), 184,725,090 distinct states found (2,766,495 ds/min), 52,785,705 states left on queue. +Progress(36) at 2024-11-06 16:32:52: 1,821,258,069 states generated (29,139,781 s/min), 187,681,452 distinct states found (2,956,362 ds/min), 53,592,610 states left on queue. +Progress(36) at 2024-11-06 16:33:52: 1,850,729,054 states generated (29,470,985 s/min), 190,451,722 distinct states found (2,770,270 ds/min), 54,239,919 states left on queue. +Progress(36) at 2024-11-06 16:34:52: 1,879,860,913 states generated (29,131,859 s/min), 193,207,770 distinct states found (2,756,048 ds/min), 54,886,748 states left on queue. +Progress(36) at 2024-11-06 16:35:52: 1,909,200,565 states generated (29,339,652 s/min), 195,832,123 distinct states found (2,624,353 ds/min), 55,404,535 states left on queue. +Progress(36) at 2024-11-06 16:36:52: 1,938,403,873 states generated (29,203,308 s/min), 198,569,916 distinct states found (2,737,793 ds/min), 55,993,675 states left on queue. +Progress(36) at 2024-11-06 16:37:52: 1,968,097,695 states generated (29,693,822 s/min), 201,148,799 distinct states found (2,578,883 ds/min), 56,501,179 states left on queue. +Progress(36) at 2024-11-06 16:38:52: 1,997,628,304 states generated (29,530,609 s/min), 203,860,765 distinct states found (2,711,966 ds/min), 57,133,283 states left on queue. +Progress(36) at 2024-11-06 16:39:52: 2,027,338,755 states generated (29,710,451 s/min), 206,496,491 distinct states found (2,635,726 ds/min), 57,649,914 states left on queue. +Progress(36) at 2024-11-06 16:40:52: 2,057,072,538 states generated (29,733,783 s/min), 209,189,488 distinct states found (2,692,997 ds/min), 58,229,449 states left on queue. +Progress(36) at 2024-11-06 16:41:52: 2,086,549,250 states generated (29,476,712 s/min), 211,909,869 distinct states found (2,720,381 ds/min), 58,875,611 states left on queue. +Progress(37) at 2024-11-06 16:42:52: 2,115,953,926 states generated (29,404,676 s/min), 214,630,876 distinct states found (2,721,007 ds/min), 59,494,220 states left on queue. +Progress(37) at 2024-11-06 16:43:52: 2,145,423,196 states generated (29,469,270 s/min), 217,412,888 distinct states found (2,782,012 ds/min), 60,176,423 states left on queue. +Progress(37) at 2024-11-06 16:44:52: 2,174,796,796 states generated (29,373,600 s/min), 220,316,140 distinct states found (2,903,252 ds/min), 60,925,815 states left on queue. +Progress(37) at 2024-11-06 16:45:52: 2,203,907,384 states generated (29,110,588 s/min), 223,255,125 distinct states found (2,938,985 ds/min), 61,739,564 states left on queue. +Progress(37) at 2024-11-06 16:46:52: 2,233,378,272 states generated (29,470,888 s/min), 225,995,858 distinct states found (2,740,733 ds/min), 62,364,627 states left on queue. +Progress(37) at 2024-11-06 16:47:52: 2,262,648,334 states generated (29,270,062 s/min), 228,738,653 distinct states found (2,742,795 ds/min), 63,003,155 states left on queue. +Progress(37) at 2024-11-06 16:48:52: 2,291,309,648 states generated (28,661,314 s/min), 231,720,498 distinct states found (2,981,845 ds/min), 63,816,162 states left on queue. +Progress(37) at 2024-11-06 16:49:52: 2,320,153,384 states generated (28,843,736 s/min), 234,599,475 distinct states found (2,878,977 ds/min), 64,513,886 states left on queue. +Progress(37) at 2024-11-06 16:50:52: 2,349,538,907 states generated (29,385,523 s/min), 237,330,640 distinct states found (2,731,165 ds/min), 65,105,576 states left on queue. +Progress(37) at 2024-11-06 16:51:52: 2,379,015,082 states generated (29,476,175 s/min), 240,064,625 distinct states found (2,733,985 ds/min), 65,704,108 states left on queue. +Progress(37) at 2024-11-06 16:52:52: 2,408,376,582 states generated (29,361,500 s/min), 242,869,889 distinct states found (2,805,264 ds/min), 66,339,299 states left on queue. +Progress(37) at 2024-11-06 16:53:52: 2,437,554,516 states generated (29,177,934 s/min), 245,844,106 distinct states found (2,974,217 ds/min), 67,125,834 states left on queue. +Progress(37) at 2024-11-06 16:54:52: 2,466,925,193 states generated (29,370,677 s/min), 248,540,587 distinct states found (2,696,481 ds/min), 67,707,623 states left on queue. +Progress(37) at 2024-11-06 16:55:52: 2,496,386,977 states generated (29,461,784 s/min), 251,318,893 distinct states found (2,778,306 ds/min), 68,345,796 states left on queue. +Progress(37) at 2024-11-06 16:56:52: 2,525,837,965 states generated (29,450,988 s/min), 253,918,986 distinct states found (2,600,093 ds/min), 68,851,521 states left on queue. +Progress(37) at 2024-11-06 16:57:52: 2,555,073,687 states generated (29,235,722 s/min), 256,806,753 distinct states found (2,887,767 ds/min), 69,596,597 states left on queue. +Progress(37) at 2024-11-06 16:58:52: 2,584,381,294 states generated (29,307,607 s/min), 259,714,054 distinct states found (2,907,301 ds/min), 70,335,539 states left on queue. +Progress(37) at 2024-11-06 16:59:52: 2,613,557,081 states generated (29,175,787 s/min), 262,407,462 distinct states found (2,693,408 ds/min), 70,920,265 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 17:00:53) +Progress(37) at 2024-11-06 17:00:53: 2,643,168,141 states generated (29,611,060 s/min), 264,973,171 distinct states found (2,565,709 ds/min), 71,384,749 states left on queue. +Progress(37) at 2024-11-06 17:01:53: 2,672,453,868 states generated (29,285,727 s/min), 267,551,971 distinct states found (2,578,800 ds/min), 71,854,220 states left on queue. +Progress(37) at 2024-11-06 17:02:53: 2,701,696,399 states generated (29,242,531 s/min), 270,233,135 distinct states found (2,681,164 ds/min), 72,406,567 states left on queue. +Progress(37) at 2024-11-06 17:03:53: 2,731,216,488 states generated (29,520,089 s/min), 272,711,390 distinct states found (2,478,255 ds/min), 72,805,269 states left on queue. +Progress(37) at 2024-11-06 17:04:53: 2,760,788,758 states generated (29,572,270 s/min), 275,307,217 distinct states found (2,595,827 ds/min), 73,313,123 states left on queue. +Progress(37) at 2024-11-06 17:05:53: 2,790,339,552 states generated (29,550,794 s/min), 277,881,113 distinct states found (2,573,896 ds/min), 73,833,900 states left on queue. +Progress(37) at 2024-11-06 17:06:53: 2,820,046,206 states generated (29,706,654 s/min), 280,371,086 distinct states found (2,489,973 ds/min), 74,231,258 states left on queue. +Progress(37) at 2024-11-06 17:07:53: 2,849,787,753 states generated (29,741,547 s/min), 283,097,131 distinct states found (2,726,045 ds/min), 74,814,735 states left on queue. +Progress(37) at 2024-11-06 17:08:53: 2,879,520,949 states generated (29,733,196 s/min), 285,608,053 distinct states found (2,510,922 ds/min), 75,293,894 states left on queue. +Progress(37) at 2024-11-06 17:09:53: 2,908,889,760 states generated (29,368,811 s/min), 288,274,872 distinct states found (2,666,819 ds/min), 75,880,480 states left on queue. +Progress(38) at 2024-11-06 17:10:53: 2,938,412,523 states generated (29,522,763 s/min), 290,877,598 distinct states found (2,602,726 ds/min), 76,391,156 states left on queue. +Progress(38) at 2024-11-06 17:11:53: 2,967,963,455 states generated (29,550,932 s/min), 293,492,146 distinct states found (2,614,548 ds/min), 76,932,124 states left on queue. +Progress(38) at 2024-11-06 17:12:53: 2,997,327,370 states generated (29,363,915 s/min), 296,353,306 distinct states found (2,861,160 ds/min), 77,659,606 states left on queue. +Progress(38) at 2024-11-06 17:13:53: 3,026,713,138 states generated (29,385,768 s/min), 299,173,963 distinct states found (2,820,657 ds/min), 78,342,645 states left on queue. +Progress(38) at 2024-11-06 17:14:53: 3,055,986,492 states generated (29,273,354 s/min), 302,024,049 distinct states found (2,850,086 ds/min), 79,071,501 states left on queue. +Progress(38) at 2024-11-06 17:15:53: 3,085,491,974 states generated (29,505,482 s/min), 304,668,970 distinct states found (2,644,921 ds/min), 79,608,084 states left on queue. +Progress(38) at 2024-11-06 17:16:53: 3,114,898,266 states generated (29,406,292 s/min), 307,272,526 distinct states found (2,603,556 ds/min), 80,132,575 states left on queue. +Progress(38) at 2024-11-06 17:17:53: 3,144,023,490 states generated (29,125,224 s/min), 310,022,073 distinct states found (2,749,547 ds/min), 80,777,238 states left on queue. +Progress(38) at 2024-11-06 17:18:53: 3,172,762,795 states generated (28,739,305 s/min), 312,891,905 distinct states found (2,869,832 ds/min), 81,497,739 states left on queue. +Progress(38) at 2024-11-06 17:19:53: 3,201,314,425 states generated (28,551,630 s/min), 315,766,566 distinct states found (2,874,661 ds/min), 82,171,729 states left on queue. +Progress(38) at 2024-11-06 17:20:53: 3,230,713,777 states generated (29,399,352 s/min), 318,365,612 distinct states found (2,599,046 ds/min), 82,638,018 states left on queue. +Progress(38) at 2024-11-06 17:21:53: 3,260,188,634 states generated (29,474,857 s/min), 321,040,810 distinct states found (2,675,198 ds/min), 83,185,708 states left on queue. +Progress(38) at 2024-11-06 17:22:53: 3,289,654,456 states generated (29,465,822 s/min), 323,660,313 distinct states found (2,619,503 ds/min), 83,689,075 states left on queue. +Progress(38) at 2024-11-06 17:23:53: 3,319,003,677 states generated (29,349,221 s/min), 326,391,347 distinct states found (2,731,034 ds/min), 84,261,368 states left on queue. +Progress(38) at 2024-11-06 17:24:53: 3,348,330,685 states generated (29,327,008 s/min), 329,204,934 distinct states found (2,813,587 ds/min), 84,925,046 states left on queue. +Progress(38) at 2024-11-06 17:25:53: 3,377,572,946 states generated (29,242,261 s/min), 331,997,887 distinct states found (2,792,953 ds/min), 85,533,473 states left on queue. +Progress(38) at 2024-11-06 17:26:53: 3,406,881,714 states generated (29,308,768 s/min), 334,599,745 distinct states found (2,601,858 ds/min), 86,047,276 states left on queue. +Progress(38) at 2024-11-06 17:27:53: 3,436,375,389 states generated (29,493,675 s/min), 337,261,572 distinct states found (2,661,827 ds/min), 86,591,357 states left on queue. +Progress(38) at 2024-11-06 17:28:53: 3,465,811,732 states generated (29,436,343 s/min), 339,829,613 distinct states found (2,568,041 ds/min), 87,057,550 states left on queue. +Progress(38) at 2024-11-06 17:29:53: 3,495,144,983 states generated (29,333,251 s/min), 342,566,275 distinct states found (2,736,662 ds/min), 87,671,131 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 17:30:53) +Progress(38) at 2024-11-06 17:30:53: 3,524,611,246 states generated (29,466,263 s/min), 345,366,358 distinct states found (2,800,083 ds/min), 88,316,673 states left on queue. +Progress(38) at 2024-11-06 17:31:53: 3,553,819,331 states generated (29,208,085 s/min), 348,291,666 distinct states found (2,925,308 ds/min), 89,059,679 states left on queue. +Progress(38) at 2024-11-06 17:32:53: 3,583,208,821 states generated (29,389,490 s/min), 350,796,636 distinct states found (2,504,970 ds/min), 89,478,521 states left on queue. +Progress(38) at 2024-11-06 17:33:53: 3,612,329,910 states generated (29,121,089 s/min), 353,414,448 distinct states found (2,617,812 ds/min), 90,008,568 states left on queue. +Progress(38) at 2024-11-06 17:34:53: 3,641,485,253 states generated (29,155,343 s/min), 356,010,441 distinct states found (2,595,993 ds/min), 90,486,313 states left on queue. +Progress(38) at 2024-11-06 17:35:53: 3,670,761,645 states generated (29,276,392 s/min), 358,411,973 distinct states found (2,401,532 ds/min), 90,799,029 states left on queue. +Progress(38) at 2024-11-06 17:36:53: 3,700,008,207 states generated (29,246,562 s/min), 360,943,422 distinct states found (2,531,449 ds/min), 91,235,694 states left on queue. +Progress(38) at 2024-11-06 17:37:53: 3,729,045,761 states generated (29,037,554 s/min), 363,523,499 distinct states found (2,580,077 ds/min), 91,685,579 states left on queue. +Progress(38) at 2024-11-06 17:38:53: 3,758,697,262 states generated (29,651,501 s/min), 365,860,396 distinct states found (2,336,897 ds/min), 92,003,313 states left on queue. +Progress(38) at 2024-11-06 17:39:53: 3,788,188,489 states generated (29,491,227 s/min), 368,369,398 distinct states found (2,509,002 ds/min), 92,452,083 states left on queue. +Progress(38) at 2024-11-06 17:40:53: 3,817,718,772 states generated (29,530,283 s/min), 370,855,965 distinct states found (2,486,567 ds/min), 92,899,812 states left on queue. +Progress(38) at 2024-11-06 17:41:53: 3,847,372,748 states generated (29,653,976 s/min), 373,231,774 distinct states found (2,375,809 ds/min), 93,202,503 states left on queue. +Progress(38) at 2024-11-06 17:42:53: 3,877,091,950 states generated (29,719,202 s/min), 375,934,374 distinct states found (2,702,600 ds/min), 93,775,105 states left on queue. +Progress(38) at 2024-11-06 17:43:53: 3,906,843,295 states generated (29,751,345 s/min), 378,304,497 distinct states found (2,370,123 ds/min), 94,098,611 states left on queue. +Progress(38) at 2024-11-06 17:44:53: 3,936,304,033 states generated (29,460,738 s/min), 380,793,774 distinct states found (2,489,277 ds/min), 94,560,398 states left on queue. +Progress(38) at 2024-11-06 17:45:53: 3,965,687,311 states generated (29,383,278 s/min), 383,366,376 distinct states found (2,572,602 ds/min), 95,062,163 states left on queue. +Progress(38) at 2024-11-06 17:46:53: 3,995,264,758 states generated (29,577,447 s/min), 385,832,314 distinct states found (2,465,938 ds/min), 95,460,777 states left on queue. +Progress(38) at 2024-11-06 17:47:53: 4,024,519,333 states generated (29,254,575 s/min), 388,384,282 distinct states found (2,551,968 ds/min), 95,931,698 states left on queue. +Progress(38) at 2024-11-06 17:48:53: 4,054,053,752 states generated (29,534,419 s/min), 390,990,581 distinct states found (2,606,299 ds/min), 96,493,705 states left on queue. +Progress(38) at 2024-11-06 17:49:53: 4,083,403,606 states generated (29,349,854 s/min), 393,717,328 distinct states found (2,726,747 ds/min), 97,099,592 states left on queue. +Progress(38) at 2024-11-06 17:50:53: 4,112,753,694 states generated (29,350,088 s/min), 396,441,909 distinct states found (2,724,581 ds/min), 97,694,523 states left on queue. +Progress(38) at 2024-11-06 17:51:53: 4,141,940,951 states generated (29,187,257 s/min), 399,238,612 distinct states found (2,796,703 ds/min), 98,387,103 states left on queue. +Progress(38) at 2024-11-06 17:52:53: 4,171,185,273 states generated (29,244,322 s/min), 401,861,376 distinct states found (2,622,764 ds/min), 98,900,168 states left on queue. +Progress(38) at 2024-11-06 17:53:53: 4,200,735,055 states generated (29,549,782 s/min), 404,419,627 distinct states found (2,558,251 ds/min), 99,388,507 states left on queue. +Progress(38) at 2024-11-06 17:54:53: 4,230,057,902 states generated (29,322,847 s/min), 406,926,477 distinct states found (2,506,850 ds/min), 99,826,562 states left on queue. +Progress(38) at 2024-11-06 17:55:53: 4,259,279,515 states generated (29,221,613 s/min), 409,512,606 distinct states found (2,586,129 ds/min), 100,340,214 states left on queue. +Progress(38) at 2024-11-06 17:56:53: 4,288,265,663 states generated (28,986,148 s/min), 412,254,402 distinct states found (2,741,796 ds/min), 100,966,036 states left on queue. +Progress(38) at 2024-11-06 17:57:53: 4,316,798,413 states generated (28,532,750 s/min), 415,047,481 distinct states found (2,793,079 ds/min), 101,589,869 states left on queue. +Progress(38) at 2024-11-06 17:58:53: 4,345,527,290 states generated (28,728,877 s/min), 417,768,588 distinct states found (2,721,107 ds/min), 102,133,503 states left on queue. +Progress(38) at 2024-11-06 17:59:53: 4,374,924,942 states generated (29,397,652 s/min), 420,254,082 distinct states found (2,485,494 ds/min), 102,500,461 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 18:00:54) +Progress(38) at 2024-11-06 18:00:54: 4,404,604,911 states generated (29,679,969 s/min), 422,801,691 distinct states found (2,547,609 ds/min), 102,936,440 states left on queue. +Progress(38) at 2024-11-06 18:01:54: 4,434,018,901 states generated (29,413,990 s/min), 425,477,119 distinct states found (2,675,428 ds/min), 103,472,987 states left on queue. +Progress(38) at 2024-11-06 18:02:54: 4,463,498,297 states generated (29,479,396 s/min), 427,949,289 distinct states found (2,472,170 ds/min), 103,858,839 states left on queue. +Progress(38) at 2024-11-06 18:03:54: 4,492,775,931 states generated (29,277,634 s/min), 430,592,094 distinct states found (2,642,805 ds/min), 104,353,609 states left on queue. +Progress(38) at 2024-11-06 18:04:54: 4,522,002,300 states generated (29,226,369 s/min), 433,322,584 distinct states found (2,730,490 ds/min), 104,949,753 states left on queue. +Progress(38) at 2024-11-06 18:05:54: 4,551,375,180 states generated (29,372,880 s/min), 436,005,138 distinct states found (2,682,554 ds/min), 105,482,546 states left on queue. +Progress(38) at 2024-11-06 18:06:54: 4,580,718,169 states generated (29,342,989 s/min), 438,516,579 distinct states found (2,511,441 ds/min), 105,868,435 states left on queue. +Progress(38) at 2024-11-06 18:07:54: 4,609,859,344 states generated (29,141,175 s/min), 441,134,700 distinct states found (2,618,121 ds/min), 106,390,335 states left on queue. +Progress(38) at 2024-11-06 18:08:54: 4,639,331,150 states generated (29,471,806 s/min), 443,662,679 distinct states found (2,527,979 ds/min), 106,821,264 states left on queue. +Progress(38) at 2024-11-06 18:09:54: 4,668,696,820 states generated (29,365,670 s/min), 446,222,969 distinct states found (2,560,290 ds/min), 107,277,508 states left on queue. +Progress(38) at 2024-11-06 18:10:54: 4,698,140,829 states generated (29,444,009 s/min), 448,693,022 distinct states found (2,470,053 ds/min), 107,654,262 states left on queue. +Progress(38) at 2024-11-06 18:11:54: 4,727,380,985 states generated (29,240,156 s/min), 451,459,276 distinct states found (2,766,254 ds/min), 108,284,101 states left on queue. +Progress(38) at 2024-11-06 18:12:54: 4,756,654,088 states generated (29,273,103 s/min), 454,180,180 distinct states found (2,720,904 ds/min), 108,879,205 states left on queue. +Progress(38) at 2024-11-06 18:13:54: 4,785,893,104 states generated (29,239,016 s/min), 457,001,077 distinct states found (2,820,897 ds/min), 109,511,015 states left on queue. +Progress(38) at 2024-11-06 18:14:54: 4,815,289,339 states generated (29,396,235 s/min), 459,530,340 distinct states found (2,529,263 ds/min), 109,951,588 states left on queue. +Progress(38) at 2024-11-06 18:15:54: 4,844,354,767 states generated (29,065,428 s/min), 462,144,567 distinct states found (2,614,227 ds/min), 110,455,692 states left on queue. +Progress(38) at 2024-11-06 18:16:54: 4,873,381,465 states generated (29,026,698 s/min), 464,718,128 distinct states found (2,573,561 ds/min), 110,936,992 states left on queue. +Progress(38) at 2024-11-06 18:17:54: 4,902,616,179 states generated (29,234,714 s/min), 467,171,620 distinct states found (2,453,492 ds/min), 111,288,450 states left on queue. +Progress(38) at 2024-11-06 18:18:54: 4,931,808,383 states generated (29,192,204 s/min), 469,593,253 distinct states found (2,421,633 ds/min), 111,607,240 states left on queue. +Progress(38) at 2024-11-06 18:19:54: 4,961,319,800 states generated (29,511,417 s/min), 471,795,067 distinct states found (2,201,814 ds/min), 111,770,077 states left on queue. +Progress(38) at 2024-11-06 18:20:54: 4,990,051,892 states generated (28,732,092 s/min), 474,595,717 distinct states found (2,800,650 ds/min), 112,380,795 states left on queue. +Progress(38) at 2024-11-06 18:21:54: 5,019,620,389 states generated (29,568,497 s/min), 476,860,178 distinct states found (2,264,461 ds/min), 112,610,789 states left on queue. +Progress(38) at 2024-11-06 18:22:54: 5,049,176,225 states generated (29,555,836 s/min), 479,117,000 distinct states found (2,256,822 ds/min), 112,849,809 states left on queue. +Progress(38) at 2024-11-06 18:23:54: 5,078,659,511 states generated (29,483,286 s/min), 481,552,566 distinct states found (2,435,566 ds/min), 113,238,679 states left on queue. +Progress(38) at 2024-11-06 18:24:54: 5,108,186,428 states generated (29,526,917 s/min), 483,970,290 distinct states found (2,417,724 ds/min), 113,645,974 states left on queue. +Progress(38) at 2024-11-06 18:25:54: 5,137,766,496 states generated (29,580,068 s/min), 486,204,445 distinct states found (2,234,155 ds/min), 113,816,273 states left on queue. +Progress(38) at 2024-11-06 18:26:54: 5,167,429,477 states generated (29,662,981 s/min), 488,726,479 distinct states found (2,522,034 ds/min), 114,265,425 states left on queue. +Progress(38) at 2024-11-06 18:27:54: 5,197,227,715 states generated (29,798,238 s/min), 491,213,848 distinct states found (2,487,369 ds/min), 114,645,624 states left on queue. +Progress(38) at 2024-11-06 18:28:54: 5,226,883,420 states generated (29,655,705 s/min), 493,480,968 distinct states found (2,267,120 ds/min), 114,901,786 states left on queue. +Progress(38) at 2024-11-06 18:29:54: 5,256,355,905 states generated (29,472,485 s/min), 495,866,549 distinct states found (2,385,581 ds/min), 115,277,276 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 18:30:55) +Progress(38) at 2024-11-06 18:30:55: 5,286,035,252 states generated (29,679,347 s/min), 498,324,679 distinct states found (2,458,130 ds/min), 115,663,015 states left on queue. +Progress(38) at 2024-11-06 18:31:55: 5,315,467,724 states generated (29,432,472 s/min), 500,723,577 distinct states found (2,398,898 ds/min), 116,023,619 states left on queue. +Progress(38) at 2024-11-06 18:32:55: 5,344,728,453 states generated (29,260,729 s/min), 503,156,876 distinct states found (2,433,299 ds/min), 116,384,801 states left on queue. +Progress(38) at 2024-11-06 18:33:55: 5,374,055,231 states generated (29,326,778 s/min), 505,588,957 distinct states found (2,432,081 ds/min), 116,786,679 states left on queue. +Progress(38) at 2024-11-06 18:34:55: 5,403,566,278 states generated (29,511,047 s/min), 508,096,703 distinct states found (2,507,746 ds/min), 117,258,425 states left on queue. +Progress(38) at 2024-11-06 18:35:55: 5,432,770,932 states generated (29,204,654 s/min), 510,765,370 distinct states found (2,668,667 ds/min), 117,821,443 states left on queue. +Progress(38) at 2024-11-06 18:36:55: 5,462,325,607 states generated (29,554,675 s/min), 513,306,027 distinct states found (2,540,657 ds/min), 118,252,946 states left on queue. +Progress(38) at 2024-11-06 18:37:55: 5,491,531,381 states generated (29,205,774 s/min), 516,017,383 distinct states found (2,711,356 ds/min), 118,857,035 states left on queue. +Progress(38) at 2024-11-06 18:38:55: 5,520,744,572 states generated (29,213,191 s/min), 518,696,783 distinct states found (2,679,400 ds/min), 119,445,954 states left on queue. +Progress(38) at 2024-11-06 18:39:55: 5,549,903,819 states generated (29,159,247 s/min), 521,329,662 distinct states found (2,632,879 ds/min), 119,977,569 states left on queue. +Progress(38) at 2024-11-06 18:40:55: 5,579,474,839 states generated (29,571,020 s/min), 523,702,578 distinct states found (2,372,916 ds/min), 120,289,041 states left on queue. +Progress(38) at 2024-11-06 18:41:55: 5,608,757,550 states generated (29,282,711 s/min), 526,191,629 distinct states found (2,489,051 ds/min), 120,719,632 states left on queue. +Progress(38) at 2024-11-06 18:42:55: 5,638,085,090 states generated (29,327,540 s/min), 528,478,505 distinct states found (2,286,876 ds/min), 120,990,568 states left on queue. +Progress(38) at 2024-11-06 18:43:55: 5,667,141,833 states generated (29,056,743 s/min), 531,035,593 distinct states found (2,557,088 ds/min), 121,480,763 states left on queue. +Progress(38) at 2024-11-06 18:44:55: 5,696,139,104 states generated (28,997,271 s/min), 533,684,330 distinct states found (2,648,737 ds/min), 122,027,516 states left on queue. +Progress(38) at 2024-11-06 18:45:55: 5,724,868,902 states generated (28,729,798 s/min), 536,316,715 distinct states found (2,632,385 ds/min), 122,548,317 states left on queue. +Progress(38) at 2024-11-06 18:46:55: 5,753,438,871 states generated (28,569,969 s/min), 539,001,028 distinct states found (2,684,313 ds/min), 123,041,578 states left on queue. +Progress(38) at 2024-11-06 18:47:55: 5,782,391,778 states generated (28,952,907 s/min), 541,537,259 distinct states found (2,536,231 ds/min), 123,436,184 states left on queue. +Progress(38) at 2024-11-06 18:48:55: 5,811,823,996 states generated (29,432,218 s/min), 543,896,432 distinct states found (2,359,173 ds/min), 123,698,698 states left on queue. +Progress(38) at 2024-11-06 18:49:55: 5,841,258,941 states generated (29,434,945 s/min), 546,273,191 distinct states found (2,376,759 ds/min), 124,012,754 states left on queue. +Progress(38) at 2024-11-06 18:50:55: 5,870,667,995 states generated (29,409,054 s/min), 548,835,686 distinct states found (2,562,495 ds/min), 124,450,482 states left on queue. +Progress(38) at 2024-11-06 18:51:55: 5,900,038,718 states generated (29,370,723 s/min), 551,304,457 distinct states found (2,468,771 ds/min), 124,805,220 states left on queue. +Progress(38) at 2024-11-06 18:52:55: 5,929,442,421 states generated (29,403,703 s/min), 553,776,296 distinct states found (2,471,839 ds/min), 125,178,608 states left on queue. +Progress(38) at 2024-11-06 18:53:55: 5,958,838,496 states generated (29,396,075 s/min), 556,289,762 distinct states found (2,513,466 ds/min), 125,588,158 states left on queue. +Progress(38) at 2024-11-06 18:54:55: 5,988,187,325 states generated (29,348,829 s/min), 558,898,224 distinct states found (2,608,462 ds/min), 126,074,377 states left on queue. +Progress(38) at 2024-11-06 18:55:55: 6,017,546,111 states generated (29,358,786 s/min), 561,530,468 distinct states found (2,632,244 ds/min), 126,579,784 states left on queue. +Progress(38) at 2024-11-06 18:56:55: 6,046,777,143 states generated (29,231,032 s/min), 564,182,546 distinct states found (2,652,078 ds/min), 127,037,883 states left on queue. +Progress(39) at 2024-11-06 18:57:55: 6,076,111,479 states generated (29,334,336 s/min), 566,509,898 distinct states found (2,327,352 ds/min), 127,319,036 states left on queue. +Progress(39) at 2024-11-06 18:58:55: 6,105,215,668 states generated (29,104,189 s/min), 569,000,954 distinct states found (2,491,056 ds/min), 127,724,185 states left on queue. +Progress(39) at 2024-11-06 18:59:55: 6,134,619,650 states generated (29,403,982 s/min), 571,444,199 distinct states found (2,443,245 ds/min), 128,083,849 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 19:00:55) +Progress(39) at 2024-11-06 19:00:55: 6,164,303,226 states generated (29,683,576 s/min), 574,046,920 distinct states found (2,602,721 ds/min), 128,537,330 states left on queue. +Progress(39) at 2024-11-06 19:01:55: 6,193,710,515 states generated (29,407,289 s/min), 576,294,161 distinct states found (2,247,241 ds/min), 128,749,186 states left on queue. +Progress(39) at 2024-11-06 19:02:55: 6,223,050,437 states generated (29,339,922 s/min), 578,840,811 distinct states found (2,546,650 ds/min), 129,198,375 states left on queue. +Progress(39) at 2024-11-06 19:03:55: 6,252,273,339 states generated (29,222,902 s/min), 581,530,481 distinct states found (2,689,670 ds/min), 129,745,195 states left on queue. +Progress(39) at 2024-11-06 19:04:55: 6,281,535,213 states generated (29,261,874 s/min), 584,206,969 distinct states found (2,676,488 ds/min), 130,306,182 states left on queue. +Progress(39) at 2024-11-06 19:05:55: 6,310,569,147 states generated (29,033,934 s/min), 587,031,959 distinct states found (2,824,990 ds/min), 130,922,629 states left on queue. +Progress(39) at 2024-11-06 19:06:55: 6,339,951,741 states generated (29,382,594 s/min), 589,709,668 distinct states found (2,677,709 ds/min), 131,483,555 states left on queue. +Progress(39) at 2024-11-06 19:07:55: 6,369,354,481 states generated (29,402,740 s/min), 591,964,654 distinct states found (2,254,986 ds/min), 131,688,532 states left on queue. +Progress(39) at 2024-11-06 19:08:55: 6,398,254,591 states generated (28,900,110 s/min), 594,604,924 distinct states found (2,640,270 ds/min), 132,195,069 states left on queue. +Progress(39) at 2024-11-06 19:09:55: 6,427,422,756 states generated (29,168,165 s/min), 597,059,083 distinct states found (2,454,159 ds/min), 132,571,626 states left on queue. +Progress(39) at 2024-11-06 19:10:55: 6,456,469,721 states generated (29,046,965 s/min), 599,400,317 distinct states found (2,341,234 ds/min), 132,826,474 states left on queue. +Progress(39) at 2024-11-06 19:11:55: 6,485,733,442 states generated (29,263,721 s/min), 602,040,336 distinct states found (2,640,019 ds/min), 133,286,664 states left on queue. +Progress(39) at 2024-11-06 19:12:55: 6,515,001,998 states generated (29,268,556 s/min), 604,003,958 distinct states found (1,963,622 ds/min), 133,255,252 states left on queue. +Progress(39) at 2024-11-06 19:13:55: 6,544,172,146 states generated (29,170,148 s/min), 606,473,164 distinct states found (2,469,206 ds/min), 133,627,323 states left on queue. +Progress(39) at 2024-11-06 19:14:55: 6,572,975,355 states generated (28,803,209 s/min), 609,043,606 distinct states found (2,570,442 ds/min), 134,023,262 states left on queue. +Progress(39) at 2024-11-06 19:15:55: 6,602,534,934 states generated (29,559,579 s/min), 611,212,652 distinct states found (2,169,046 ds/min), 134,205,070 states left on queue. +Progress(39) at 2024-11-06 19:16:55: 6,632,044,851 states generated (29,509,917 s/min), 613,377,378 distinct states found (2,164,726 ds/min), 134,360,577 states left on queue. +Progress(39) at 2024-11-06 19:17:55: 6,661,465,356 states generated (29,420,505 s/min), 615,729,605 distinct states found (2,352,227 ds/min), 134,679,148 states left on queue. +Progress(39) at 2024-11-06 19:18:55: 6,690,848,776 states generated (29,383,420 s/min), 618,034,126 distinct states found (2,304,521 ds/min), 134,989,999 states left on queue. +Progress(39) at 2024-11-06 19:19:55: 6,720,362,641 states generated (29,513,865 s/min), 620,264,990 distinct states found (2,230,864 ds/min), 135,213,527 states left on queue. +Progress(39) at 2024-11-06 19:20:55: 6,749,995,972 states generated (29,633,331 s/min), 622,424,423 distinct states found (2,159,433 ds/min), 135,336,269 states left on queue. +Progress(39) at 2024-11-06 19:21:55: 6,779,641,479 states generated (29,645,507 s/min), 624,953,002 distinct states found (2,528,579 ds/min), 135,781,717 states left on queue. +Progress(39) at 2024-11-06 19:22:55: 6,809,496,805 states generated (29,855,326 s/min), 627,297,563 distinct states found (2,344,561 ds/min), 136,040,988 states left on queue. +Progress(39) at 2024-11-06 19:23:55: 6,839,096,708 states generated (29,599,903 s/min), 629,464,688 distinct states found (2,167,125 ds/min), 136,210,971 states left on queue. +Progress(39) at 2024-11-06 19:24:55: 6,868,614,311 states generated (29,517,603 s/min), 631,704,627 distinct states found (2,239,939 ds/min), 136,469,731 states left on queue. +Progress(39) at 2024-11-06 19:25:55: 6,897,932,930 states generated (29,318,619 s/min), 633,961,042 distinct states found (2,256,415 ds/min), 136,714,912 states left on queue. +Progress(39) at 2024-11-06 19:26:55: 6,927,200,602 states generated (29,267,672 s/min), 636,414,800 distinct states found (2,453,758 ds/min), 137,101,547 states left on queue. +Progress(39) at 2024-11-06 19:27:55: 6,956,755,074 states generated (29,554,472 s/min), 638,616,489 distinct states found (2,201,689 ds/min), 137,285,238 states left on queue. +Progress(39) at 2024-11-06 19:28:55: 6,985,926,285 states generated (29,171,211 s/min), 640,970,274 distinct states found (2,353,785 ds/min), 137,592,586 states left on queue. +Progress(39) at 2024-11-06 19:29:55: 7,015,240,294 states generated (29,314,009 s/min), 643,310,280 distinct states found (2,340,006 ds/min), 137,914,322 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 19:30:56) +Progress(39) at 2024-11-06 19:30:56: 7,045,112,039 states generated (29,871,745 s/min), 645,650,251 distinct states found (2,339,971 ds/min), 138,248,533 states left on queue. +Progress(39) at 2024-11-06 19:31:56: 7,074,347,122 states generated (29,235,083 s/min), 648,286,341 distinct states found (2,636,090 ds/min), 138,800,606 states left on queue. +Progress(39) at 2024-11-06 19:32:56: 7,103,701,427 states generated (29,354,305 s/min), 650,776,754 distinct states found (2,490,413 ds/min), 139,200,935 states left on queue. +Progress(39) at 2024-11-06 19:33:56: 7,133,125,574 states generated (29,424,147 s/min), 653,222,778 distinct states found (2,446,024 ds/min), 139,553,972 states left on queue. +Progress(39) at 2024-11-06 19:34:56: 7,162,393,954 states generated (29,268,380 s/min), 655,812,815 distinct states found (2,590,037 ds/min), 140,051,736 states left on queue. +Progress(39) at 2024-11-06 19:35:56: 7,191,614,309 states generated (29,220,355 s/min), 658,388,779 distinct states found (2,575,964 ds/min), 140,550,430 states left on queue. +Progress(39) at 2024-11-06 19:36:56: 7,220,841,977 states generated (29,227,668 s/min), 660,885,901 distinct states found (2,497,122 ds/min), 140,973,038 states left on queue. +Progress(39) at 2024-11-06 19:37:56: 7,250,020,241 states generated (29,178,264 s/min), 663,335,701 distinct states found (2,449,800 ds/min), 141,327,800 states left on queue. +Progress(39) at 2024-11-06 19:38:56: 7,279,545,923 states generated (29,525,682 s/min), 665,706,252 distinct states found (2,370,551 ds/min), 141,666,628 states left on queue. +Progress(39) at 2024-11-06 19:39:56: 7,308,806,585 states generated (29,260,662 s/min), 668,059,763 distinct states found (2,353,511 ds/min), 141,985,139 states left on queue. +Progress(39) at 2024-11-06 19:40:56: 7,338,028,888 states generated (29,222,303 s/min), 670,241,848 distinct states found (2,182,085 ds/min), 142,169,842 states left on queue. +Progress(39) at 2024-11-06 19:41:56: 7,367,241,753 states generated (29,212,865 s/min), 672,613,255 distinct states found (2,371,407 ds/min), 142,507,724 states left on queue. +Progress(39) at 2024-11-06 19:42:56: 7,396,269,434 states generated (29,027,681 s/min), 675,112,517 distinct states found (2,499,262 ds/min), 142,941,967 states left on queue. +Progress(39) at 2024-11-06 19:43:56: 7,425,237,701 states generated (28,968,267 s/min), 677,646,850 distinct states found (2,534,333 ds/min), 143,388,301 states left on queue. +Progress(39) at 2024-11-06 19:44:56: 7,453,929,312 states generated (28,691,611 s/min), 680,183,486 distinct states found (2,536,636 ds/min), 143,823,998 states left on queue. +Progress(39) at 2024-11-06 19:45:56: 7,482,605,282 states generated (28,675,970 s/min), 682,751,269 distinct states found (2,567,783 ds/min), 144,211,694 states left on queue. +Progress(39) at 2024-11-06 19:46:56: 7,511,402,194 states generated (28,796,912 s/min), 685,177,338 distinct states found (2,426,069 ds/min), 144,502,576 states left on queue. +Progress(39) at 2024-11-06 19:47:56: 7,540,667,315 states generated (29,265,121 s/min), 687,470,422 distinct states found (2,293,084 ds/min), 144,717,485 states left on queue. +Progress(39) at 2024-11-06 19:48:56: 7,570,065,371 states generated (29,398,056 s/min), 689,724,172 distinct states found (2,253,750 ds/min), 144,895,541 states left on queue. +Progress(39) at 2024-11-06 19:49:56: 7,599,596,791 states generated (29,531,420 s/min), 692,064,101 distinct states found (2,339,929 ds/min), 145,171,911 states left on queue. +Progress(39) at 2024-11-06 19:50:56: 7,629,011,363 states generated (29,414,572 s/min), 694,540,161 distinct states found (2,476,060 ds/min), 145,540,423 states left on queue. +Progress(39) at 2024-11-06 19:51:56: 7,658,453,965 states generated (29,442,602 s/min), 696,912,122 distinct states found (2,371,961 ds/min), 145,809,567 states left on queue. +Progress(39) at 2024-11-06 19:52:56: 7,687,913,137 states generated (29,459,172 s/min), 699,240,630 distinct states found (2,328,508 ds/min), 146,098,273 states left on queue. +Progress(39) at 2024-11-06 19:53:56: 7,717,161,254 states generated (29,248,117 s/min), 701,789,915 distinct states found (2,549,285 ds/min), 146,502,121 states left on queue. +Progress(39) at 2024-11-06 19:54:56: 7,746,587,948 states generated (29,426,694 s/min), 704,037,014 distinct states found (2,247,099 ds/min), 146,684,369 states left on queue. +Progress(39) at 2024-11-06 19:55:56: 7,775,767,241 states generated (29,179,293 s/min), 706,750,225 distinct states found (2,713,211 ds/min), 147,270,858 states left on queue. +Progress(39) at 2024-11-06 19:56:56: 7,805,143,313 states generated (29,376,072 s/min), 709,214,940 distinct states found (2,464,715 ds/min), 147,627,166 states left on queue. +Progress(39) at 2024-11-06 19:57:56: 7,834,403,478 states generated (29,260,165 s/min), 711,759,633 distinct states found (2,544,693 ds/min), 147,996,842 states left on queue. +Progress(40) at 2024-11-06 19:58:56: 7,863,785,909 states generated (29,382,431 s/min), 713,915,903 distinct states found (2,156,270 ds/min), 148,107,480 states left on queue. +Progress(40) at 2024-11-06 19:59:56: 7,892,661,923 states generated (28,876,014 s/min), 716,529,052 distinct states found (2,613,149 ds/min), 148,615,346 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 20:00:57) +Progress(40) at 2024-11-06 20:00:57: 7,922,354,868 states generated (29,692,945 s/min), 718,724,840 distinct states found (2,195,788 ds/min), 148,760,464 states left on queue. +Progress(40) at 2024-11-06 20:01:57: 7,951,821,345 states generated (29,466,477 s/min), 721,199,790 distinct states found (2,474,950 ds/min), 149,133,458 states left on queue. +Progress(40) at 2024-11-06 20:02:57: 7,981,212,562 states generated (29,391,217 s/min), 723,637,084 distinct states found (2,437,294 ds/min), 149,453,388 states left on queue. +Progress(40) at 2024-11-06 20:03:57: 8,010,639,344 states generated (29,426,782 s/min), 725,776,597 distinct states found (2,139,513 ds/min), 149,580,205 states left on queue. +Progress(40) at 2024-11-06 20:04:57: 8,039,970,078 states generated (29,330,734 s/min), 728,145,896 distinct states found (2,369,299 ds/min), 149,873,787 states left on queue. +Progress(40) at 2024-11-06 20:05:57: 8,069,221,501 states generated (29,251,423 s/min), 730,835,980 distinct states found (2,690,084 ds/min), 150,431,663 states left on queue. +Progress(40) at 2024-11-06 20:06:57: 8,098,568,645 states generated (29,347,144 s/min), 733,266,238 distinct states found (2,430,258 ds/min), 150,772,190 states left on queue. +Progress(40) at 2024-11-06 20:07:57: 8,127,646,970 states generated (29,078,325 s/min), 736,001,441 distinct states found (2,735,203 ds/min), 151,368,297 states left on queue. +Progress(40) at 2024-11-06 20:08:57: 8,156,755,007 states generated (29,108,037 s/min), 738,759,675 distinct states found (2,758,234 ds/min), 151,912,929 states left on queue. +Progress(40) at 2024-11-06 20:09:57: 8,186,234,810 states generated (29,479,803 s/min), 741,336,146 distinct states found (2,576,471 ds/min), 152,376,828 states left on queue. +Progress(40) at 2024-11-06 20:10:57: 8,215,641,994 states generated (29,407,184 s/min), 743,647,353 distinct states found (2,311,207 ds/min), 152,617,899 states left on queue. +Progress(40) at 2024-11-06 20:11:57: 8,244,746,445 states generated (29,104,451 s/min), 746,080,007 distinct states found (2,432,654 ds/min), 152,939,104 states left on queue. +Progress(40) at 2024-11-06 20:12:57: 8,273,514,095 states generated (28,767,650 s/min), 748,726,701 distinct states found (2,646,694 ds/min), 153,445,645 states left on queue. +Progress(40) at 2024-11-06 20:13:57: 8,302,647,011 states generated (29,132,916 s/min), 751,041,420 distinct states found (2,314,719 ds/min), 153,711,631 states left on queue. +Progress(40) at 2024-11-06 20:14:57: 8,331,785,512 states generated (29,138,501 s/min), 753,262,324 distinct states found (2,220,904 ds/min), 153,861,206 states left on queue. +Progress(40) at 2024-11-06 20:15:57: 8,361,058,813 states generated (29,273,301 s/min), 755,881,803 distinct states found (2,619,479 ds/min), 154,293,451 states left on queue. +Progress(40) at 2024-11-06 20:16:57: 8,390,323,842 states generated (29,265,029 s/min), 757,769,813 distinct states found (1,888,010 ds/min), 154,184,183 states left on queue. +Progress(40) at 2024-11-06 20:17:57: 8,419,579,524 states generated (29,255,682 s/min), 760,009,795 distinct states found (2,239,982 ds/min), 154,382,656 states left on queue. +Progress(40) at 2024-11-06 20:18:57: 8,448,394,343 states generated (28,814,819 s/min), 762,597,225 distinct states found (2,587,430 ds/min), 154,795,314 states left on queue. +Progress(40) at 2024-11-06 20:19:57: 8,477,530,142 states generated (29,135,799 s/min), 764,903,184 distinct states found (2,305,959 ds/min), 154,997,361 states left on queue. +Progress(40) at 2024-11-06 20:20:57: 8,507,035,930 states generated (29,505,788 s/min), 766,887,142 distinct states found (1,983,958 ds/min), 155,034,831 states left on queue. +Progress(40) at 2024-11-06 20:21:57: 8,536,505,703 states generated (29,469,773 s/min), 769,048,483 distinct states found (2,161,341 ds/min), 155,183,742 states left on queue. +Progress(40) at 2024-11-06 20:22:57: 8,565,867,584 states generated (29,361,881 s/min), 771,258,076 distinct states found (2,209,593 ds/min), 155,385,262 states left on queue. +Progress(40) at 2024-11-06 20:23:57: 8,595,185,764 states generated (29,318,180 s/min), 773,454,985 distinct states found (2,196,909 ds/min), 155,614,111 states left on queue. +Progress(40) at 2024-11-06 20:24:57: 8,624,496,269 states generated (29,310,505 s/min), 775,619,630 distinct states found (2,164,645 ds/min), 155,798,174 states left on queue. +Progress(40) at 2024-11-06 20:25:57: 8,654,080,073 states generated (29,583,804 s/min), 777,637,410 distinct states found (2,017,780 ds/min), 155,782,045 states left on queue. +Progress(40) at 2024-11-06 20:26:57: 8,683,722,009 states generated (29,641,936 s/min), 779,940,399 distinct states found (2,302,989 ds/min), 156,073,330 states left on queue. +Progress(40) at 2024-11-06 20:27:57: 8,713,410,725 states generated (29,688,716 s/min), 782,406,987 distinct states found (2,466,588 ds/min), 156,445,902 states left on queue. +Progress(40) at 2024-11-06 20:28:57: 8,743,158,002 states generated (29,747,277 s/min), 784,542,609 distinct states found (2,135,622 ds/min), 156,539,841 states left on queue. +Progress(40) at 2024-11-06 20:29:57: 8,772,688,809 states generated (29,530,807 s/min), 786,583,608 distinct states found (2,040,999 ds/min), 156,630,041 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 20:30:57) +Progress(40) at 2024-11-06 20:30:57: 8,802,299,219 states generated (29,610,410 s/min), 788,709,007 distinct states found (2,125,399 ds/min), 156,780,966 states left on queue. +Progress(40) at 2024-11-06 20:31:57: 8,831,545,663 states generated (29,246,444 s/min), 790,874,634 distinct states found (2,165,627 ds/min), 156,943,688 states left on queue. +Progress(40) at 2024-11-06 20:32:57: 8,860,742,526 states generated (29,196,863 s/min), 793,218,612 distinct states found (2,343,978 ds/min), 157,247,738 states left on queue. +Progress(40) at 2024-11-06 20:33:57: 8,890,145,689 states generated (29,403,163 s/min), 795,347,746 distinct states found (2,129,134 ds/min), 157,376,715 states left on queue. +Progress(40) at 2024-11-06 20:34:57: 8,919,277,440 states generated (29,131,751 s/min), 797,557,991 distinct states found (2,210,245 ds/min), 157,566,508 states left on queue. +Progress(40) at 2024-11-06 20:35:57: 8,948,368,355 states generated (29,090,915 s/min), 799,870,441 distinct states found (2,312,450 ds/min), 157,825,337 states left on queue. +Progress(40) at 2024-11-06 20:36:57: 8,977,811,769 states generated (29,443,414 s/min), 801,992,418 distinct states found (2,121,977 ds/min), 158,015,008 states left on queue. +Progress(40) at 2024-11-06 20:37:57: 9,007,285,675 states generated (29,473,906 s/min), 804,250,024 distinct states found (2,257,606 ds/min), 158,295,507 states left on queue. +Progress(40) at 2024-11-06 20:38:57: 9,036,450,953 states generated (29,165,278 s/min), 806,795,860 distinct states found (2,545,836 ds/min), 158,767,907 states left on queue. +Progress(40) at 2024-11-06 20:39:57: 9,065,704,268 states generated (29,253,315 s/min), 809,198,438 distinct states found (2,402,578 ds/min), 159,105,121 states left on queue. +Progress(40) at 2024-11-06 20:40:57: 9,095,165,427 states generated (29,461,159 s/min), 811,512,584 distinct states found (2,314,146 ds/min), 159,345,117 states left on queue. +Progress(40) at 2024-11-06 20:41:57: 9,124,541,297 states generated (29,375,870 s/min), 813,905,920 distinct states found (2,393,336 ds/min), 159,672,325 states left on queue. +Progress(40) at 2024-11-06 20:42:57: 9,153,712,591 states generated (29,171,294 s/min), 816,392,570 distinct states found (2,486,650 ds/min), 160,082,547 states left on queue. +Progress(40) at 2024-11-06 20:43:57: 9,182,920,866 states generated (29,208,275 s/min), 818,845,538 distinct states found (2,452,968 ds/min), 160,476,056 states left on queue. +Progress(40) at 2024-11-06 20:44:57: 9,212,093,614 states generated (29,172,748 s/min), 821,212,595 distinct states found (2,367,057 ds/min), 160,787,698 states left on queue. +Progress(40) at 2024-11-06 20:45:57: 9,241,177,362 states generated (29,083,748 s/min), 823,731,111 distinct states found (2,518,516 ds/min), 161,227,975 states left on queue. +Progress(40) at 2024-11-06 20:46:57: 9,270,666,448 states generated (29,489,086 s/min), 825,877,262 distinct states found (2,146,151 ds/min), 161,339,209 states left on queue. +Progress(40) at 2024-11-06 20:47:57: 9,299,985,513 states generated (29,319,065 s/min), 828,195,512 distinct states found (2,318,250 ds/min), 161,644,069 states left on queue. +Progress(40) at 2024-11-06 20:48:57: 9,329,155,005 states generated (29,169,492 s/min), 830,386,518 distinct states found (2,191,006 ds/min), 161,807,802 states left on queue. +Progress(40) at 2024-11-06 20:49:57: 9,358,433,771 states generated (29,278,766 s/min), 832,419,931 distinct states found (2,033,413 ds/min), 161,882,018 states left on queue. +Progress(40) at 2024-11-06 20:50:57: 9,387,665,287 states generated (29,231,516 s/min), 834,751,267 distinct states found (2,331,336 ds/min), 162,183,217 states left on queue. +Progress(40) at 2024-11-06 20:51:57: 9,416,697,647 states generated (29,032,360 s/min), 837,127,657 distinct states found (2,376,390 ds/min), 162,511,558 states left on queue. +Progress(40) at 2024-11-06 20:52:57: 9,445,747,666 states generated (29,050,019 s/min), 839,556,372 distinct states found (2,428,715 ds/min), 162,873,418 states left on queue. +Progress(40) at 2024-11-06 20:53:57: 9,474,599,613 states generated (28,851,947 s/min), 841,985,780 distinct states found (2,429,408 ds/min), 163,231,531 states left on queue. +Progress(40) at 2024-11-06 20:54:57: 9,503,408,525 states generated (28,808,912 s/min), 844,368,680 distinct states found (2,382,900 ds/min), 163,533,407 states left on queue. +Progress(40) at 2024-11-06 20:55:57: 9,532,128,492 states generated (28,719,967 s/min), 846,804,519 distinct states found (2,435,839 ds/min), 163,787,695 states left on queue. +Progress(40) at 2024-11-06 20:56:57: 9,560,935,598 states generated (28,807,106 s/min), 849,075,143 distinct states found (2,270,624 ds/min), 163,946,240 states left on queue. +Progress(40) at 2024-11-06 20:57:57: 9,590,127,374 states generated (29,191,776 s/min), 851,260,378 distinct states found (2,185,235 ds/min), 164,077,372 states left on queue. +Progress(40) at 2024-11-06 20:58:57: 9,619,514,341 states generated (29,386,967 s/min), 853,352,738 distinct states found (2,092,360 ds/min), 164,118,186 states left on queue. +Progress(40) at 2024-11-06 20:59:57: 9,648,985,302 states generated (29,470,961 s/min), 855,543,408 distinct states found (2,190,670 ds/min), 164,279,076 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 21:00:58) +Progress(40) at 2024-11-06 21:00:58: 9,678,677,722 states generated (29,692,420 s/min), 857,894,775 distinct states found (2,351,367 ds/min), 164,516,395 states left on queue. +Progress(40) at 2024-11-06 21:01:58: 9,708,095,509 states generated (29,417,787 s/min), 860,383,155 distinct states found (2,488,380 ds/min), 164,898,153 states left on queue. +Progress(40) at 2024-11-06 21:02:58: 9,737,488,378 states generated (29,392,869 s/min), 862,497,194 distinct states found (2,114,039 ds/min), 164,966,010 states left on queue. +Progress(40) at 2024-11-06 21:03:58: 9,766,895,552 states generated (29,407,174 s/min), 864,819,525 distinct states found (2,322,331 ds/min), 165,232,701 states left on queue. +Progress(40) at 2024-11-06 21:04:58: 9,796,208,300 states generated (29,312,748 s/min), 867,276,841 distinct states found (2,457,316 ds/min), 165,568,613 states left on queue. +Progress(40) at 2024-11-06 21:05:58: 9,825,603,726 states generated (29,395,426 s/min), 869,434,526 distinct states found (2,157,685 ds/min), 165,685,610 states left on queue. +Progress(40) at 2024-11-06 21:06:58: 9,854,789,772 states generated (29,186,046 s/min), 871,934,034 distinct states found (2,499,508 ds/min), 166,084,000 states left on queue. +Progress(40) at 2024-11-06 21:07:58: 9,884,028,390 states generated (29,238,618 s/min), 874,443,659 distinct states found (2,509,625 ds/min), 166,483,652 states left on queue. +Progress(40) at 2024-11-06 21:08:58: 9,913,377,669 states generated (29,349,279 s/min), 876,803,913 distinct states found (2,360,254 ds/min), 166,740,702 states left on queue. +Progress(40) at 2024-11-06 21:09:58: 9,942,721,749 states generated (29,344,080 s/min), 879,187,270 distinct states found (2,383,357 ds/min), 166,953,562 states left on queue. +Progress(41) at 2024-11-06 21:10:58: 9,972,078,704 states generated (29,356,955 s/min), 881,233,361 distinct states found (2,046,091 ds/min), 166,999,841 states left on queue. +Progress(41) at 2024-11-06 21:11:58: 10,000,914,792 states generated (28,836,088 s/min), 883,811,441 distinct states found (2,578,080 ds/min), 167,466,583 states left on queue. +Progress(41) at 2024-11-06 21:12:58: 10,030,210,434 states generated (29,295,642 s/min), 885,899,950 distinct states found (2,088,509 ds/min), 167,531,826 states left on queue. +Progress(41) at 2024-11-06 21:13:58: 10,059,587,070 states generated (29,376,636 s/min), 888,188,669 distinct states found (2,288,719 ds/min), 167,753,242 states left on queue. +Progress(41) at 2024-11-06 21:14:58: 10,089,078,901 states generated (29,491,831 s/min), 890,649,997 distinct states found (2,461,328 ds/min), 168,098,890 states left on queue. +Progress(41) at 2024-11-06 21:15:58: 10,118,348,352 states generated (29,269,451 s/min), 892,695,892 distinct states found (2,045,895 ds/min), 168,141,532 states left on queue. +Progress(41) at 2024-11-06 21:16:58: 10,147,644,676 states generated (29,296,324 s/min), 894,823,997 distinct states found (2,128,105 ds/min), 168,231,032 states left on queue. +Progress(41) at 2024-11-06 21:17:58: 10,176,967,773 states generated (29,323,097 s/min), 897,225,523 distinct states found (2,401,526 ds/min), 168,555,740 states left on queue. +Progress(41) at 2024-11-06 21:18:58: 10,206,275,174 states generated (29,307,401 s/min), 899,814,626 distinct states found (2,589,103 ds/min), 169,020,971 states left on queue. +Progress(41) at 2024-11-06 21:19:58: 10,235,593,993 states generated (29,318,819 s/min), 902,141,356 distinct states found (2,326,730 ds/min), 169,267,251 states left on queue. +Progress(41) at 2024-11-06 21:20:58: 10,264,799,049 states generated (29,205,056 s/min), 904,746,333 distinct states found (2,604,977 ds/min), 169,758,459 states left on queue. +Progress(41) at 2024-11-06 21:21:58: 10,293,910,586 states generated (29,111,537 s/min), 907,433,182 distinct states found (2,686,849 ds/min), 170,277,176 states left on queue. +Progress(41) at 2024-11-06 21:22:58: 10,323,190,750 states generated (29,280,164 s/min), 910,052,108 distinct states found (2,618,926 ds/min), 170,695,212 states left on queue. +Progress(41) at 2024-11-06 21:23:58: 10,352,580,182 states generated (29,389,432 s/min), 912,516,064 distinct states found (2,463,956 ds/min), 171,083,771 states left on queue. +Progress(41) at 2024-11-06 21:24:58: 10,381,951,479 states generated (29,371,297 s/min), 914,781,443 distinct states found (2,265,379 ds/min), 171,281,545 states left on queue. +Progress(41) at 2024-11-06 21:25:58: 10,411,026,945 states generated (29,075,466 s/min), 917,078,052 distinct states found (2,296,609 ds/min), 171,498,613 states left on queue. +Progress(41) at 2024-11-06 21:26:58: 10,439,904,441 states generated (28,877,496 s/min), 919,547,808 distinct states found (2,469,756 ds/min), 171,860,589 states left on queue. +Progress(41) at 2024-11-06 21:27:58: 10,469,008,600 states generated (29,104,159 s/min), 921,912,547 distinct states found (2,364,739 ds/min), 172,121,551 states left on queue. +Progress(41) at 2024-11-06 21:28:58: 10,497,834,986 states generated (28,826,386 s/min), 924,235,840 distinct states found (2,323,293 ds/min), 172,353,661 states left on queue. +Progress(41) at 2024-11-06 21:29:58: 10,527,064,696 states generated (29,229,710 s/min), 926,456,744 distinct states found (2,220,904 ds/min), 172,508,439 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 21:30:59) +Progress(41) at 2024-11-06 21:30:59: 10,556,579,142 states generated (29,514,446 s/min), 928,988,872 distinct states found (2,532,128 ds/min), 172,833,183 states left on queue. +Progress(41) at 2024-11-06 21:31:59: 10,585,719,909 states generated (29,140,767 s/min), 930,745,149 distinct states found (1,756,277 ds/min), 172,622,496 states left on queue. +Progress(41) at 2024-11-06 21:32:59: 10,614,881,115 states generated (29,161,206 s/min), 932,948,083 distinct states found (2,202,934 ds/min), 172,792,818 states left on queue. +Progress(41) at 2024-11-06 21:33:59: 10,643,693,909 states generated (28,812,794 s/min), 935,441,862 distinct states found (2,493,779 ds/min), 173,119,721 states left on queue. +Progress(41) at 2024-11-06 21:34:59: 10,672,671,166 states generated (28,977,257 s/min), 937,653,961 distinct states found (2,212,099 ds/min), 173,216,843 states left on queue. +Progress(41) at 2024-11-06 21:35:59: 10,702,072,440 states generated (29,401,274 s/min), 939,638,920 distinct states found (1,984,959 ds/min), 173,254,076 states left on queue. +Progress(41) at 2024-11-06 21:36:59: 10,731,415,292 states generated (29,342,852 s/min), 941,583,653 distinct states found (1,944,733 ds/min), 173,229,968 states left on queue. +Progress(41) at 2024-11-06 21:37:59: 10,760,802,656 states generated (29,387,364 s/min), 943,770,610 distinct states found (2,186,957 ds/min), 173,412,799 states left on queue. +Progress(41) at 2024-11-06 21:38:59: 10,789,961,996 states generated (29,159,340 s/min), 945,790,519 distinct states found (2,019,909 ds/min), 173,482,204 states left on queue. +Progress(41) at 2024-11-06 21:39:59: 10,819,303,972 states generated (29,341,976 s/min), 947,902,156 distinct states found (2,111,637 ds/min), 173,640,941 states left on queue. +Progress(41) at 2024-11-06 21:40:59: 10,848,636,471 states generated (29,332,499 s/min), 949,908,145 distinct states found (2,005,989 ds/min), 173,684,074 states left on queue. +Progress(41) at 2024-11-06 21:41:59: 10,878,207,345 states generated (29,570,874 s/min), 951,870,784 distinct states found (1,962,639 ds/min), 173,648,255 states left on queue. +Progress(41) at 2024-11-06 21:42:59: 10,907,777,091 states generated (29,569,746 s/min), 954,123,321 distinct states found (2,252,537 ds/min), 173,881,583 states left on queue. +Progress(41) at 2024-11-06 21:43:59: 10,937,383,465 states generated (29,606,374 s/min), 956,486,701 distinct states found (2,363,380 ds/min), 174,173,694 states left on queue. +Progress(41) at 2024-11-06 21:44:59: 10,967,070,713 states generated (29,687,248 s/min), 958,539,717 distinct states found (2,053,016 ds/min), 174,194,592 states left on queue. +Progress(41) at 2024-11-06 21:45:59: 10,996,524,132 states generated (29,453,419 s/min), 960,439,766 distinct states found (1,900,049 ds/min), 174,165,777 states left on queue. +Progress(41) at 2024-11-06 21:46:59: 11,025,919,452 states generated (29,395,320 s/min), 962,518,661 distinct states found (2,078,895 ds/min), 174,284,642 states left on queue. +Progress(41) at 2024-11-06 21:47:59: 11,055,087,136 states generated (29,167,684 s/min), 964,440,130 distinct states found (1,921,469 ds/min), 174,253,951 states left on queue. +Progress(41) at 2024-11-06 21:48:59: 11,084,346,164 states generated (29,259,028 s/min), 966,652,841 distinct states found (2,212,711 ds/min), 174,452,762 states left on queue. +Progress(41) at 2024-11-06 21:49:59: 11,113,503,996 states generated (29,157,832 s/min), 968,786,590 distinct states found (2,133,749 ds/min), 174,578,147 states left on queue. +Progress(41) at 2024-11-06 21:50:59: 11,142,862,327 states generated (29,358,331 s/min), 970,780,918 distinct states found (1,994,328 ds/min), 174,585,050 states left on queue. +Progress(41) at 2024-11-06 21:51:59: 11,171,907,560 states generated (29,045,233 s/min), 972,924,432 distinct states found (2,143,514 ds/min), 174,718,189 states left on queue. +Progress(41) at 2024-11-06 21:52:59: 11,201,055,602 states generated (29,148,042 s/min), 975,106,131 distinct states found (2,181,699 ds/min), 174,874,035 states left on queue. +Progress(41) at 2024-11-06 21:53:59: 11,230,576,268 states generated (29,520,666 s/min), 977,176,048 distinct states found (2,069,917 ds/min), 175,042,666 states left on queue. +Progress(41) at 2024-11-06 21:54:59: 11,259,928,257 states generated (29,351,989 s/min), 979,337,351 distinct states found (2,161,303 ds/min), 175,248,665 states left on queue. +Progress(41) at 2024-11-06 21:55:59: 11,289,190,366 states generated (29,262,109 s/min), 981,837,130 distinct states found (2,499,779 ds/min), 175,680,736 states left on queue. +Progress(41) at 2024-11-06 21:56:59: 11,318,399,828 states generated (29,209,462 s/min), 984,112,195 distinct states found (2,275,065 ds/min), 175,913,580 states left on queue. +Progress(41) at 2024-11-06 21:57:59: 11,347,862,845 states generated (29,463,017 s/min), 986,368,069 distinct states found (2,255,874 ds/min), 176,126,523 states left on queue. +Progress(41) at 2024-11-06 21:58:59: 11,377,318,937 states generated (29,456,092 s/min), 988,548,686 distinct states found (2,180,617 ds/min), 176,253,552 states left on queue. +Progress(41) at 2024-11-06 21:59:59: 11,406,551,913 states generated (29,232,976 s/min), 990,875,071 distinct states found (2,326,385 ds/min), 176,528,465 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 22:00:59) +Progress(41) at 2024-11-06 22:00:59: 11,436,006,666 states generated (29,454,753 s/min), 993,234,999 distinct states found (2,359,928 ds/min), 176,816,755 states left on queue. +Progress(41) at 2024-11-06 22:01:59: 11,465,207,151 states generated (29,200,485 s/min), 995,557,179 distinct states found (2,322,180 ds/min), 177,094,397 states left on queue. +Progress(41) at 2024-11-06 22:02:59: 11,494,298,575 states generated (29,091,424 s/min), 997,927,812 distinct states found (2,370,633 ds/min), 177,411,890 states left on queue. +Progress(41) at 2024-11-06 22:03:59: 11,523,576,632 states generated (29,278,057 s/min), 1,000,196,030 distinct states found (2,268,218 ds/min), 177,640,656 states left on queue. +Progress(41) at 2024-11-06 22:04:59: 11,552,734,483 states generated (29,157,851 s/min), 1,002,452,277 distinct states found (2,256,247 ds/min), 177,827,247 states left on queue. +Progress(41) at 2024-11-06 22:05:59: 11,582,200,298 states generated (29,465,815 s/min), 1,004,593,818 distinct states found (2,141,541 ds/min), 177,983,707 states left on queue. +Progress(41) at 2024-11-06 22:06:59: 11,611,484,149 states generated (29,283,851 s/min), 1,006,774,383 distinct states found (2,180,565 ds/min), 178,161,577 states left on queue. +Progress(41) at 2024-11-06 22:07:59: 11,640,449,232 states generated (28,965,083 s/min), 1,008,870,356 distinct states found (2,095,973 ds/min), 178,245,657 states left on queue. +Progress(41) at 2024-11-06 22:08:59: 11,669,695,402 states generated (29,246,170 s/min), 1,010,743,262 distinct states found (1,872,906 ds/min), 178,199,630 states left on queue. +Progress(41) at 2024-11-06 22:09:59: 11,698,855,657 states generated (29,160,255 s/min), 1,012,993,163 distinct states found (2,249,901 ds/min), 178,433,806 states left on queue. +Progress(41) at 2024-11-06 22:10:59: 11,727,873,536 states generated (29,017,879 s/min), 1,015,222,628 distinct states found (2,229,465 ds/min), 178,645,315 states left on queue. +Progress(41) at 2024-11-06 22:11:59: 11,756,910,696 states generated (29,037,160 s/min), 1,017,493,811 distinct states found (2,271,183 ds/min), 178,885,854 states left on queue. +Progress(41) at 2024-11-06 22:12:59: 11,785,841,957 states generated (28,931,261 s/min), 1,019,798,730 distinct states found (2,304,919 ds/min), 179,138,831 states left on queue. +Progress(41) at 2024-11-06 22:13:59: 11,814,627,351 states generated (28,785,394 s/min), 1,022,115,935 distinct states found (2,317,205 ds/min), 179,401,355 states left on queue. +Progress(41) at 2024-11-06 22:14:59: 11,843,482,288 states generated (28,854,937 s/min), 1,024,372,991 distinct states found (2,257,056 ds/min), 179,570,167 states left on queue. +Progress(41) at 2024-11-06 22:15:59: 11,872,232,503 states generated (28,750,215 s/min), 1,026,655,919 distinct states found (2,282,928 ds/min), 179,704,400 states left on queue. +Progress(41) at 2024-11-06 22:16:59: 11,901,011,327 states generated (28,778,824 s/min), 1,028,780,151 distinct states found (2,124,232 ds/min), 179,744,822 states left on queue. +Progress(41) at 2024-11-06 22:17:59: 11,930,078,061 states generated (29,066,734 s/min), 1,030,863,673 distinct states found (2,083,522 ds/min), 179,790,662 states left on queue. +Progress(41) at 2024-11-06 22:18:59: 11,959,463,901 states generated (29,385,840 s/min), 1,032,840,344 distinct states found (1,976,671 ds/min), 179,738,442 states left on queue. +Progress(41) at 2024-11-06 22:19:59: 11,988,811,132 states generated (29,347,231 s/min), 1,034,897,049 distinct states found (2,056,705 ds/min), 179,788,782 states left on queue. +Progress(41) at 2024-11-06 22:20:59: 12,018,335,911 states generated (29,524,779 s/min), 1,037,158,579 distinct states found (2,261,530 ds/min), 179,978,226 states left on queue. +Progress(41) at 2024-11-06 22:21:59: 12,047,755,593 states generated (29,419,682 s/min), 1,039,437,623 distinct states found (2,279,044 ds/min), 180,177,371 states left on queue. +Progress(41) at 2024-11-06 22:22:59: 12,077,111,001 states generated (29,355,408 s/min), 1,041,672,961 distinct states found (2,235,338 ds/min), 180,336,777 states left on queue. +Progress(41) at 2024-11-06 22:23:59: 12,106,556,177 states generated (29,445,176 s/min), 1,043,675,880 distinct states found (2,002,919 ds/min), 180,345,759 states left on queue. +Progress(41) at 2024-11-06 22:24:59: 12,135,797,446 states generated (29,241,269 s/min), 1,045,966,606 distinct states found (2,290,726 ds/min), 180,552,887 states left on queue. +Progress(41) at 2024-11-06 22:25:59: 12,165,143,756 states generated (29,346,310 s/min), 1,048,373,643 distinct states found (2,407,037 ds/min), 180,860,142 states left on queue. +Progress(41) at 2024-11-06 22:26:59: 12,194,478,236 states generated (29,334,480 s/min), 1,050,403,560 distinct states found (2,029,917 ds/min), 180,873,811 states left on queue. +Progress(41) at 2024-11-06 22:27:59: 12,223,653,080 states generated (29,174,844 s/min), 1,052,798,502 distinct states found (2,394,942 ds/min), 181,184,025 states left on queue. +Progress(41) at 2024-11-06 22:28:59: 12,252,926,784 states generated (29,273,704 s/min), 1,055,243,990 distinct states found (2,445,488 ds/min), 181,542,525 states left on queue. +Progress(41) at 2024-11-06 22:29:59: 12,282,176,071 states generated (29,249,287 s/min), 1,057,488,489 distinct states found (2,244,499 ds/min), 181,704,266 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 22:31:00) +Progress(41) at 2024-11-06 22:31:00: 12,311,654,529 states generated (29,478,458 s/min), 1,059,789,296 distinct states found (2,300,807 ds/min), 181,875,392 states left on queue. +Progress(41) at 2024-11-06 22:32:00: 12,340,837,903 states generated (29,183,374 s/min), 1,061,857,294 distinct states found (2,067,998 ds/min), 181,860,690 states left on queue. +Progress(41) at 2024-11-06 22:33:00: 12,369,978,352 states generated (29,140,449 s/min), 1,063,943,173 distinct states found (2,085,879 ds/min), 181,951,091 states left on queue. +Progress(41) at 2024-11-06 22:34:00: 12,398,820,660 states generated (28,842,308 s/min), 1,066,384,327 distinct states found (2,441,154 ds/min), 182,284,376 states left on queue. +Progress(41) at 2024-11-06 22:35:00: 12,427,966,245 states generated (29,145,585 s/min), 1,068,376,116 distinct states found (1,991,789 ds/min), 182,275,982 states left on queue. +Progress(41) at 2024-11-06 22:36:00: 12,457,300,671 states generated (29,334,426 s/min), 1,070,596,949 distinct states found (2,220,833 ds/min), 182,442,278 states left on queue. +Progress(41) at 2024-11-06 22:37:00: 12,486,769,483 states generated (29,468,812 s/min), 1,072,968,640 distinct states found (2,371,691 ds/min), 182,718,485 states left on queue. +Progress(41) at 2024-11-06 22:38:00: 12,516,031,360 states generated (29,261,877 s/min), 1,075,001,378 distinct states found (2,032,738 ds/min), 182,729,966 states left on queue. +Progress(41) at 2024-11-06 22:39:00: 12,545,265,331 states generated (29,233,971 s/min), 1,076,880,794 distinct states found (1,879,416 ds/min), 182,634,798 states left on queue. +Progress(41) at 2024-11-06 22:40:00: 12,574,495,559 states generated (29,230,228 s/min), 1,079,123,856 distinct states found (2,243,062 ds/min), 182,812,322 states left on queue. +Progress(41) at 2024-11-06 22:41:00: 12,603,757,387 states generated (29,261,828 s/min), 1,081,610,769 distinct states found (2,486,913 ds/min), 183,219,247 states left on queue. +Progress(41) at 2024-11-06 22:42:00: 12,632,909,026 states generated (29,151,639 s/min), 1,083,967,637 distinct states found (2,356,868 ds/min), 183,478,879 states left on queue. +Progress(41) at 2024-11-06 22:43:00: 12,662,254,981 states generated (29,345,955 s/min), 1,086,272,935 distinct states found (2,305,298 ds/min), 183,726,701 states left on queue. +Progress(41) at 2024-11-06 22:44:00: 12,691,400,218 states generated (29,145,237 s/min), 1,088,778,928 distinct states found (2,505,993 ds/min), 184,128,274 states left on queue. +Progress(41) at 2024-11-06 22:45:00: 12,720,528,098 states generated (29,127,880 s/min), 1,091,335,929 distinct states found (2,557,001 ds/min), 184,556,078 states left on queue. +Progress(41) at 2024-11-06 22:46:00: 12,749,701,886 states generated (29,173,788 s/min), 1,093,889,510 distinct states found (2,553,581 ds/min), 184,916,391 states left on queue. +Progress(41) at 2024-11-06 22:47:00: 12,779,153,937 states generated (29,452,051 s/min), 1,096,185,973 distinct states found (2,296,463 ds/min), 185,115,877 states left on queue. +Progress(41) at 2024-11-06 22:48:00: 12,808,440,971 states generated (29,287,034 s/min), 1,098,733,865 distinct states found (2,547,892 ds/min), 185,564,617 states left on queue. +Progress(41) at 2024-11-06 22:49:00: 12,837,695,256 states generated (29,254,285 s/min), 1,100,705,460 distinct states found (1,971,595 ds/min), 185,532,558 states left on queue. +Progress(41) at 2024-11-06 22:50:00: 12,866,801,129 states generated (29,105,873 s/min), 1,103,074,603 distinct states found (2,369,143 ds/min), 185,770,427 states left on queue. +Progress(41) at 2024-11-06 22:51:00: 12,895,682,870 states generated (28,881,741 s/min), 1,105,437,747 distinct states found (2,363,144 ds/min), 186,049,274 states left on queue. +Progress(41) at 2024-11-06 22:52:00: 12,924,655,990 states generated (28,973,120 s/min), 1,107,853,554 distinct states found (2,415,807 ds/min), 186,325,129 states left on queue. +Progress(41) at 2024-11-06 22:53:00: 12,953,616,826 states generated (28,960,836 s/min), 1,110,097,321 distinct states found (2,243,767 ds/min), 186,509,276 states left on queue. +Progress(41) at 2024-11-06 22:54:00: 12,982,711,068 states generated (29,094,242 s/min), 1,112,146,097 distinct states found (2,048,776 ds/min), 186,507,356 states left on queue. +Progress(41) at 2024-11-06 22:55:00: 13,011,962,667 states generated (29,251,599 s/min), 1,114,530,785 distinct states found (2,384,688 ds/min), 186,758,016 states left on queue. +Progress(41) at 2024-11-06 22:56:00: 13,041,163,382 states generated (29,200,715 s/min), 1,116,566,038 distinct states found (2,035,253 ds/min), 186,702,453 states left on queue. +Progress(41) at 2024-11-06 22:57:00: 13,070,416,604 states generated (29,253,222 s/min), 1,118,433,735 distinct states found (1,867,697 ds/min), 186,595,926 states left on queue. +Progress(41) at 2024-11-06 22:58:00: 13,099,393,765 states generated (28,977,161 s/min), 1,120,727,626 distinct states found (2,293,891 ds/min), 186,785,521 states left on queue. +Progress(41) at 2024-11-06 22:59:00: 13,128,309,003 states generated (28,915,238 s/min), 1,123,075,278 distinct states found (2,347,652 ds/min), 186,977,496 states left on queue. +Progress(42) at 2024-11-06 23:00:00: 13,157,492,254 states generated (29,183,251 s/min), 1,125,164,050 distinct states found (2,088,772 ds/min), 186,994,591 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 23:01:01) +Progress(42) at 2024-11-06 23:01:01: 13,187,099,442 states generated (29,607,188 s/min), 1,126,955,828 distinct states found (1,791,778 ds/min), 186,860,457 states left on queue. +Progress(42) at 2024-11-06 23:02:01: 13,216,408,249 states generated (29,308,807 s/min), 1,128,852,586 distinct states found (1,896,758 ds/min), 186,800,218 states left on queue. +Progress(42) at 2024-11-06 23:03:01: 13,245,736,139 states generated (29,327,890 s/min), 1,130,960,381 distinct states found (2,107,795 ds/min), 186,911,118 states left on queue. +Progress(42) at 2024-11-06 23:04:01: 13,274,893,464 states generated (29,157,325 s/min), 1,132,863,930 distinct states found (1,903,549 ds/min), 186,892,129 states left on queue. +Progress(42) at 2024-11-06 23:05:01: 13,304,183,990 states generated (29,290,526 s/min), 1,134,876,306 distinct states found (2,012,376 ds/min), 186,965,153 states left on queue. +Progress(42) at 2024-11-06 23:06:01: 13,333,457,770 states generated (29,273,780 s/min), 1,136,812,237 distinct states found (1,935,931 ds/min), 186,957,506 states left on queue. +Progress(42) at 2024-11-06 23:07:01: 13,362,984,994 states generated (29,527,224 s/min), 1,138,649,876 distinct states found (1,837,639 ds/min), 186,823,887 states left on queue. +Progress(42) at 2024-11-06 23:08:01: 13,392,550,733 states generated (29,565,739 s/min), 1,140,795,722 distinct states found (2,145,846 ds/min), 186,974,795 states left on queue. +Progress(42) at 2024-11-06 23:09:01: 13,422,111,300 states generated (29,560,567 s/min), 1,143,038,611 distinct states found (2,242,889 ds/min), 187,179,197 states left on queue. +Progress(42) at 2024-11-06 23:10:01: 13,451,822,496 states generated (29,711,196 s/min), 1,145,071,502 distinct states found (2,032,891 ds/min), 187,190,480 states left on queue. +Progress(42) at 2024-11-06 23:11:01: 13,481,293,484 states generated (29,470,988 s/min), 1,146,905,806 distinct states found (1,834,304 ds/min), 187,079,661 states left on queue. +Progress(42) at 2024-11-06 23:12:01: 13,510,659,679 states generated (29,366,195 s/min), 1,148,841,643 distinct states found (1,935,837 ds/min), 187,082,815 states left on queue. +Progress(42) at 2024-11-06 23:13:01: 13,539,730,883 states generated (29,071,204 s/min), 1,150,715,436 distinct states found (1,873,793 ds/min), 187,013,975 states left on queue. +Progress(42) at 2024-11-06 23:14:01: 13,568,973,308 states generated (29,242,425 s/min), 1,152,689,735 distinct states found (1,974,299 ds/min), 187,016,208 states left on queue. +Progress(42) at 2024-11-06 23:15:01: 13,598,106,627 states generated (29,133,319 s/min), 1,154,829,869 distinct states found (2,140,134 ds/min), 187,147,884 states left on queue. +Progress(42) at 2024-11-06 23:16:01: 13,627,319,459 states generated (29,212,832 s/min), 1,156,740,070 distinct states found (1,910,201 ds/min), 187,086,942 states left on queue. +Progress(42) at 2024-11-06 23:17:01: 13,656,462,121 states generated (29,142,662 s/min), 1,158,698,307 distinct states found (1,958,237 ds/min), 187,072,201 states left on queue. +Progress(42) at 2024-11-06 23:18:01: 13,685,545,941 states generated (29,083,820 s/min), 1,160,688,939 distinct states found (1,990,632 ds/min), 187,078,553 states left on queue. +Progress(42) at 2024-11-06 23:19:01: 13,714,652,628 states generated (29,106,687 s/min), 1,162,748,633 distinct states found (2,059,694 ds/min), 187,157,229 states left on queue. +Progress(42) at 2024-11-06 23:20:01: 13,744,105,986 states generated (29,453,358 s/min), 1,164,748,782 distinct states found (2,000,149 ds/min), 187,275,480 states left on queue. +Progress(42) at 2024-11-06 23:21:01: 13,773,414,393 states generated (29,308,407 s/min), 1,166,804,740 distinct states found (2,055,958 ds/min), 187,393,312 states left on queue. +Progress(42) at 2024-11-06 23:22:01: 13,802,600,069 states generated (29,185,676 s/min), 1,169,251,493 distinct states found (2,446,753 ds/min), 187,781,298 states left on queue. +Progress(42) at 2024-11-06 23:23:01: 13,831,830,649 states generated (29,230,580 s/min), 1,171,412,176 distinct states found (2,160,683 ds/min), 187,932,991 states left on queue. +Progress(42) at 2024-11-06 23:24:01: 13,861,152,221 states generated (29,321,572 s/min), 1,173,582,994 distinct states found (2,170,818 ds/min), 188,078,037 states left on queue. +Progress(42) at 2024-11-06 23:25:01: 13,890,538,756 states generated (29,386,535 s/min), 1,175,642,901 distinct states found (2,059,907 ds/min), 188,116,794 states left on queue. +Progress(42) at 2024-11-06 23:26:01: 13,919,812,820 states generated (29,274,064 s/min), 1,177,743,048 distinct states found (2,100,147 ds/min), 188,189,399 states left on queue. +Progress(42) at 2024-11-06 23:27:01: 13,948,903,585 states generated (29,090,765 s/min), 1,179,980,470 distinct states found (2,237,422 ds/min), 188,388,309 states left on queue. +Progress(42) at 2024-11-06 23:28:01: 13,978,138,385 states generated (29,234,800 s/min), 1,182,134,981 distinct states found (2,154,511 ds/min), 188,526,735 states left on queue. +Progress(42) at 2024-11-06 23:29:01: 14,007,310,151 states generated (29,171,766 s/min), 1,184,360,360 distinct states found (2,225,379 ds/min), 188,718,575 states left on queue. +Progress(42) at 2024-11-06 23:30:01: 14,036,411,110 states generated (29,100,959 s/min), 1,186,617,835 distinct states found (2,257,475 ds/min), 188,941,068 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-06 23:31:01) +Progress(42) at 2024-11-06 23:31:01: 14,065,894,113 states generated (29,483,003 s/min), 1,188,743,048 distinct states found (2,125,213 ds/min), 189,035,636 states left on queue. +Progress(42) at 2024-11-06 23:32:01: 14,094,909,096 states generated (29,014,983 s/min), 1,191,096,961 distinct states found (2,353,913 ds/min), 189,332,174 states left on queue. +Progress(42) at 2024-11-06 23:33:01: 14,124,212,567 states generated (29,303,471 s/min), 1,193,012,997 distinct states found (1,916,036 ds/min), 189,266,016 states left on queue. +Progress(42) at 2024-11-06 23:34:01: 14,153,428,768 states generated (29,216,201 s/min), 1,195,170,448 distinct states found (2,157,451 ds/min), 189,430,881 states left on queue. +Progress(42) at 2024-11-06 23:35:01: 14,182,568,290 states generated (29,139,522 s/min), 1,197,127,126 distinct states found (1,956,678 ds/min), 189,423,769 states left on queue. +Progress(42) at 2024-11-06 23:36:01: 14,211,602,024 states generated (29,033,734 s/min), 1,199,044,612 distinct states found (1,917,486 ds/min), 189,380,199 states left on queue. +Progress(42) at 2024-11-06 23:37:01: 14,240,593,845 states generated (28,991,821 s/min), 1,200,900,028 distinct states found (1,855,416 ds/min), 189,324,925 states left on queue. +Progress(42) at 2024-11-06 23:38:01: 14,269,687,808 states generated (29,093,963 s/min), 1,203,034,598 distinct states found (2,134,570 ds/min), 189,466,947 states left on queue. +Progress(42) at 2024-11-06 23:39:01: 14,298,626,140 states generated (28,938,332 s/min), 1,205,190,806 distinct states found (2,156,208 ds/min), 189,608,794 states left on queue. +Progress(42) at 2024-11-06 23:40:01: 14,327,587,116 states generated (28,960,976 s/min), 1,207,339,559 distinct states found (2,148,753 ds/min), 189,750,359 states left on queue. +Progress(42) at 2024-11-06 23:41:01: 14,356,469,494 states generated (28,882,378 s/min), 1,209,518,146 distinct states found (2,178,587 ds/min), 189,892,036 states left on queue. +Progress(42) at 2024-11-06 23:42:01: 14,385,314,696 states generated (28,845,202 s/min), 1,211,701,473 distinct states found (2,183,327 ds/min), 190,050,090 states left on queue. +Progress(42) at 2024-11-06 23:43:01: 14,414,142,550 states generated (28,827,854 s/min), 1,213,859,919 distinct states found (2,158,446 ds/min), 190,161,804 states left on queue. +Progress(42) at 2024-11-06 23:44:01: 14,442,945,644 states generated (28,803,094 s/min), 1,216,005,127 distinct states found (2,145,208 ds/min), 190,173,898 states left on queue. +Progress(42) at 2024-11-06 23:45:01: 14,471,693,798 states generated (28,748,154 s/min), 1,218,030,292 distinct states found (2,025,165 ds/min), 190,127,864 states left on queue. +Progress(42) at 2024-11-06 23:46:01: 14,500,599,025 states generated (28,905,227 s/min), 1,219,996,243 distinct states found (1,965,951 ds/min), 190,069,034 states left on queue. +Progress(42) at 2024-11-06 23:47:01: 14,529,770,118 states generated (29,171,093 s/min), 1,221,890,284 distinct states found (1,894,041 ds/min), 189,948,701 states left on queue. +Progress(42) at 2024-11-06 23:48:01: 14,559,044,399 states generated (29,274,281 s/min), 1,223,772,100 distinct states found (1,881,816 ds/min), 189,844,417 states left on queue. +Progress(42) at 2024-11-06 23:49:01: 14,588,505,088 states generated (29,460,689 s/min), 1,225,870,790 distinct states found (2,098,690 ds/min), 189,921,025 states left on queue. +Progress(42) at 2024-11-06 23:50:01: 14,618,007,797 states generated (29,502,709 s/min), 1,227,944,381 distinct states found (2,073,591 ds/min), 189,947,590 states left on queue. +Progress(42) at 2024-11-06 23:51:01: 14,647,405,532 states generated (29,397,735 s/min), 1,230,287,712 distinct states found (2,343,331 ds/min), 190,200,223 states left on queue. +Progress(42) at 2024-11-06 23:52:01: 14,676,733,478 states generated (29,327,946 s/min), 1,232,303,440 distinct states found (2,015,728 ds/min), 190,178,290 states left on queue. +Progress(42) at 2024-11-06 23:53:01: 14,706,089,483 states generated (29,356,005 s/min), 1,234,269,055 distinct states found (1,965,615 ds/min), 190,175,215 states left on queue. +Progress(42) at 2024-11-06 23:54:01: 14,735,226,809 states generated (29,137,326 s/min), 1,236,451,189 distinct states found (2,182,134 ds/min), 190,293,853 states left on queue. +Progress(42) at 2024-11-06 23:55:01: 14,764,611,146 states generated (29,384,337 s/min), 1,238,780,557 distinct states found (2,329,368 ds/min), 190,528,991 states left on queue. +Progress(42) at 2024-11-06 23:56:01: 14,793,911,038 states generated (29,299,892 s/min), 1,240,745,156 distinct states found (1,964,599 ds/min), 190,493,881 states left on queue. +Progress(42) at 2024-11-06 23:57:01: 14,823,113,635 states generated (29,202,597 s/min), 1,242,984,781 distinct states found (2,239,625 ds/min), 190,675,723 states left on queue. +Progress(42) at 2024-11-06 23:58:01: 14,852,208,056 states generated (29,094,421 s/min), 1,245,341,804 distinct states found (2,357,023 ds/min), 190,959,027 states left on queue. +Progress(42) at 2024-11-06 23:59:01: 14,881,390,523 states generated (29,182,467 s/min), 1,247,530,823 distinct states found (2,189,019 ds/min), 191,085,175 states left on queue. +Progress(42) at 2024-11-07 00:00:01: 14,910,709,837 states generated (29,319,314 s/min), 1,249,665,632 distinct states found (2,134,809 ds/min), 191,148,911 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 00:01:02) +Progress(42) at 2024-11-07 00:01:02: 14,940,301,722 states generated (29,591,885 s/min), 1,251,820,098 distinct states found (2,154,466 ds/min), 191,164,099 states left on queue. +Progress(42) at 2024-11-07 00:02:02: 14,969,468,946 states generated (29,167,224 s/min), 1,253,608,374 distinct states found (1,788,276 ds/min), 190,977,899 states left on queue. +Progress(42) at 2024-11-07 00:03:02: 14,998,469,861 states generated (29,000,915 s/min), 1,255,846,206 distinct states found (2,237,832 ds/min), 191,179,932 states left on queue. +Progress(42) at 2024-11-07 00:04:02: 15,027,424,344 states generated (28,954,483 s/min), 1,258,012,253 distinct states found (2,166,047 ds/min), 191,269,006 states left on queue. +Progress(42) at 2024-11-07 00:05:02: 15,056,595,053 states generated (29,170,709 s/min), 1,259,974,817 distinct states found (1,962,564 ds/min), 191,232,379 states left on queue. +Progress(42) at 2024-11-07 00:06:02: 15,085,857,792 states generated (29,262,739 s/min), 1,262,139,752 distinct states found (2,164,935 ds/min), 191,351,326 states left on queue. +Progress(42) at 2024-11-07 00:07:02: 15,115,386,019 states generated (29,528,227 s/min), 1,264,425,723 distinct states found (2,285,971 ds/min), 191,549,077 states left on queue. +Progress(42) at 2024-11-07 00:08:02: 15,144,705,784 states generated (29,319,765 s/min), 1,266,390,816 distinct states found (1,965,093 ds/min), 191,495,454 states left on queue. +Progress(42) at 2024-11-07 00:09:02: 15,173,877,454 states generated (29,171,670 s/min), 1,268,144,487 distinct states found (1,753,671 ds/min), 191,300,959 states left on queue. +Progress(42) at 2024-11-07 00:10:02: 15,203,080,845 states generated (29,203,391 s/min), 1,270,256,870 distinct states found (2,112,383 ds/min), 191,363,085 states left on queue. +Progress(42) at 2024-11-07 00:11:02: 15,232,426,418 states generated (29,345,573 s/min), 1,272,624,413 distinct states found (2,367,543 ds/min), 191,673,032 states left on queue. +Progress(42) at 2024-11-07 00:12:02: 15,261,677,209 states generated (29,250,791 s/min), 1,274,995,857 distinct states found (2,371,444 ds/min), 191,960,618 states left on queue. +Progress(42) at 2024-11-07 00:13:02: 15,290,882,314 states generated (29,205,105 s/min), 1,277,269,501 distinct states found (2,273,644 ds/min), 192,155,220 states left on queue. +Progress(42) at 2024-11-07 00:14:02: 15,320,166,816 states generated (29,284,502 s/min), 1,279,524,897 distinct states found (2,255,396 ds/min), 192,367,797 states left on queue. +Progress(42) at 2024-11-07 00:15:02: 15,349,391,017 states generated (29,224,201 s/min), 1,281,912,896 distinct states found (2,387,999 ds/min), 192,657,361 states left on queue. +Progress(42) at 2024-11-07 00:16:02: 15,378,510,873 states generated (29,119,856 s/min), 1,284,352,819 distinct states found (2,439,923 ds/min), 192,982,001 states left on queue. +Progress(42) at 2024-11-07 00:17:02: 15,407,729,690 states generated (29,218,817 s/min), 1,286,798,116 distinct states found (2,445,297 ds/min), 193,251,888 states left on queue. +Progress(42) at 2024-11-07 00:18:02: 15,437,122,682 states generated (29,392,992 s/min), 1,289,060,398 distinct states found (2,262,282 ds/min), 193,393,686 states left on queue. +Progress(42) at 2024-11-07 00:19:02: 15,466,437,919 states generated (29,315,237 s/min), 1,291,390,007 distinct states found (2,329,609 ds/min), 193,674,611 states left on queue. +Progress(42) at 2024-11-07 00:20:02: 15,495,795,434 states generated (29,357,515 s/min), 1,293,625,999 distinct states found (2,235,992 ds/min), 193,855,148 states left on queue. +Progress(42) at 2024-11-07 00:21:02: 15,524,856,146 states generated (29,060,712 s/min), 1,295,675,220 distinct states found (2,049,221 ds/min), 193,858,347 states left on queue. +Progress(42) at 2024-11-07 00:22:02: 15,553,951,279 states generated (29,095,133 s/min), 1,297,806,219 distinct states found (2,130,999 ds/min), 193,910,330 states left on queue. +Progress(42) at 2024-11-07 00:23:02: 15,582,781,229 states generated (28,829,950 s/min), 1,300,215,254 distinct states found (2,409,035 ds/min), 194,211,020 states left on queue. +Progress(42) at 2024-11-07 00:24:02: 15,611,889,872 states generated (29,108,643 s/min), 1,302,431,347 distinct states found (2,216,093 ds/min), 194,324,070 states left on queue. +Progress(42) at 2024-11-07 00:25:02: 15,640,778,210 states generated (28,888,338 s/min), 1,304,674,839 distinct states found (2,243,492 ds/min), 194,483,563 states left on queue. +Progress(42) at 2024-11-07 00:26:02: 15,669,830,004 states generated (29,051,794 s/min), 1,306,661,103 distinct states found (1,986,264 ds/min), 194,429,101 states left on queue. +Progress(42) at 2024-11-07 00:27:02: 15,699,049,213 states generated (29,219,209 s/min), 1,308,920,712 distinct states found (2,259,609 ds/min), 194,577,576 states left on queue. +Progress(42) at 2024-11-07 00:28:02: 15,728,283,982 states generated (29,234,769 s/min), 1,310,924,780 distinct states found (2,004,068 ds/min), 194,488,601 states left on queue. +Progress(42) at 2024-11-07 00:29:02: 15,757,507,793 states generated (29,223,811 s/min), 1,312,729,390 distinct states found (1,804,610 ds/min), 194,321,454 states left on queue. +Progress(42) at 2024-11-07 00:30:02: 15,786,513,733 states generated (29,005,940 s/min), 1,314,926,573 distinct states found (2,197,183 ds/min), 194,422,995 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 00:31:03) +Progress(42) at 2024-11-07 00:31:03: 15,815,683,048 states generated (29,169,315 s/min), 1,317,135,461 distinct states found (2,208,888 ds/min), 194,492,192 states left on queue. +Progress(42) at 2024-11-07 00:32:03: 15,844,758,678 states generated (29,075,630 s/min), 1,319,144,875 distinct states found (2,009,414 ds/min), 194,413,387 states left on queue. +Progress(42) at 2024-11-07 00:33:03: 15,873,998,157 states generated (29,239,479 s/min), 1,320,932,025 distinct states found (1,787,150 ds/min), 194,281,981 states left on queue. +Progress(42) at 2024-11-07 00:34:03: 15,903,205,479 states generated (29,207,322 s/min), 1,322,654,400 distinct states found (1,722,375 ds/min), 194,091,121 states left on queue. +Progress(42) at 2024-11-07 00:35:03: 15,932,501,264 states generated (29,295,785 s/min), 1,324,682,430 distinct states found (2,028,030 ds/min), 194,137,494 states left on queue. +Progress(42) at 2024-11-07 00:36:03: 15,961,589,919 states generated (29,088,655 s/min), 1,326,509,334 distinct states found (1,826,904 ds/min), 194,051,639 states left on queue. +Progress(42) at 2024-11-07 00:37:03: 15,990,668,327 states generated (29,078,408 s/min), 1,328,357,672 distinct states found (1,848,338 ds/min), 193,989,585 states left on queue. +Progress(42) at 2024-11-07 00:38:03: 16,019,782,313 states generated (29,113,986 s/min), 1,330,232,446 distinct states found (1,874,774 ds/min), 193,949,446 states left on queue. +Progress(42) at 2024-11-07 00:39:03: 16,049,252,200 states generated (29,469,887 s/min), 1,331,987,412 distinct states found (1,754,966 ds/min), 193,747,896 states left on queue. +Progress(42) at 2024-11-07 00:40:03: 16,078,692,514 states generated (29,440,314 s/min), 1,333,894,185 distinct states found (1,906,773 ds/min), 193,729,942 states left on queue. +Progress(42) at 2024-11-07 00:41:03: 16,108,160,136 states generated (29,467,622 s/min), 1,336,102,661 distinct states found (2,208,476 ds/min), 193,914,624 states left on queue. +Progress(42) at 2024-11-07 00:42:03: 16,137,813,382 states generated (29,653,246 s/min), 1,338,180,836 distinct states found (2,078,175 ds/min), 193,976,996 states left on queue. +Progress(43) at 2024-11-07 00:43:03: 16,167,357,885 states generated (29,544,503 s/min), 1,339,957,139 distinct states found (1,776,303 ds/min), 193,787,392 states left on queue. +Progress(43) at 2024-11-07 00:44:03: 16,196,650,450 states generated (29,292,565 s/min), 1,341,719,088 distinct states found (1,761,949 ds/min), 193,648,551 states left on queue. +Progress(43) at 2024-11-07 00:45:03: 16,225,735,286 states generated (29,084,836 s/min), 1,343,468,127 distinct states found (1,749,039 ds/min), 193,497,590 states left on queue. +Progress(43) at 2024-11-07 00:46:03: 16,254,805,612 states generated (29,070,326 s/min), 1,345,280,226 distinct states found (1,812,099 ds/min), 193,364,788 states left on queue. +Progress(43) at 2024-11-07 00:47:03: 16,283,933,423 states generated (29,127,811 s/min), 1,347,294,879 distinct states found (2,014,653 ds/min), 193,397,713 states left on queue. +Progress(43) at 2024-11-07 00:48:03: 16,312,911,730 states generated (28,978,307 s/min), 1,349,192,377 distinct states found (1,897,498 ds/min), 193,321,503 states left on queue. +Progress(43) at 2024-11-07 00:49:03: 16,342,115,657 states generated (29,203,927 s/min), 1,350,961,684 distinct states found (1,769,307 ds/min), 193,144,596 states left on queue. +Progress(43) at 2024-11-07 00:50:03: 16,370,988,391 states generated (28,872,734 s/min), 1,352,868,904 distinct states found (1,907,220 ds/min), 193,089,969 states left on queue. +Progress(43) at 2024-11-07 00:51:03: 16,400,089,208 states generated (29,100,817 s/min), 1,354,864,448 distinct states found (1,995,544 ds/min), 193,098,377 states left on queue. +Progress(43) at 2024-11-07 00:52:03: 16,429,331,456 states generated (29,242,248 s/min), 1,356,734,632 distinct states found (1,870,184 ds/min), 193,093,615 states left on queue. +Progress(43) at 2024-11-07 00:53:03: 16,458,648,761 states generated (29,317,305 s/min), 1,358,622,917 distinct states found (1,888,285 ds/min), 193,098,172 states left on queue. +Progress(43) at 2024-11-07 00:54:03: 16,487,874,773 states generated (29,226,012 s/min), 1,360,737,908 distinct states found (2,114,991 ds/min), 193,250,949 states left on queue. +Progress(43) at 2024-11-07 00:55:03: 16,517,101,401 states generated (29,226,628 s/min), 1,363,024,072 distinct states found (2,286,164 ds/min), 193,508,719 states left on queue. +Progress(43) at 2024-11-07 00:56:03: 16,546,231,362 states generated (29,129,961 s/min), 1,365,056,771 distinct states found (2,032,699 ds/min), 193,558,441 states left on queue. +Progress(43) at 2024-11-07 00:57:03: 16,575,532,837 states generated (29,301,475 s/min), 1,367,107,709 distinct states found (2,050,938 ds/min), 193,609,354 states left on queue. +Progress(43) at 2024-11-07 00:58:03: 16,604,872,137 states generated (29,339,300 s/min), 1,369,059,417 distinct states found (1,951,708 ds/min), 193,561,420 states left on queue. +Progress(43) at 2024-11-07 00:59:03: 16,634,070,732 states generated (29,198,595 s/min), 1,371,016,928 distinct states found (1,957,511 ds/min), 193,513,278 states left on queue. +Progress(43) at 2024-11-07 01:00:03: 16,663,158,113 states generated (29,087,381 s/min), 1,373,092,542 distinct states found (2,075,614 ds/min), 193,582,661 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 01:01:03) +Progress(43) at 2024-11-07 01:01:03: 16,692,576,110 states generated (29,417,997 s/min), 1,375,200,108 distinct states found (2,107,566 ds/min), 193,664,621 states left on queue. +Progress(43) at 2024-11-07 01:02:03: 16,721,716,479 states generated (29,140,369 s/min), 1,377,247,529 distinct states found (2,047,421 ds/min), 193,708,538 states left on queue. +Progress(43) at 2024-11-07 01:03:03: 16,750,779,523 states generated (29,063,044 s/min), 1,379,368,087 distinct states found (2,120,558 ds/min), 193,813,065 states left on queue. +Progress(43) at 2024-11-07 01:04:03: 16,779,794,524 states generated (29,015,001 s/min), 1,381,371,287 distinct states found (2,003,200 ds/min), 193,825,465 states left on queue. +Progress(43) at 2024-11-07 01:05:03: 16,808,907,203 states generated (29,112,679 s/min), 1,383,515,008 distinct states found (2,143,721 ds/min), 193,953,821 states left on queue. +Progress(43) at 2024-11-07 01:06:03: 16,838,029,628 states generated (29,122,425 s/min), 1,385,629,882 distinct states found (2,114,874 ds/min), 194,038,163 states left on queue. +Progress(43) at 2024-11-07 01:07:03: 16,867,418,111 states generated (29,388,483 s/min), 1,387,561,049 distinct states found (1,931,167 ds/min), 194,004,058 states left on queue. +Progress(43) at 2024-11-07 01:08:03: 16,896,555,416 states generated (29,137,305 s/min), 1,389,592,238 distinct states found (2,031,189 ds/min), 194,058,208 states left on queue. +Progress(43) at 2024-11-07 01:09:03: 16,925,642,685 states generated (29,087,269 s/min), 1,391,404,896 distinct states found (1,812,658 ds/min), 193,924,951 states left on queue. +Progress(43) at 2024-11-07 01:10:03: 16,954,638,533 states generated (28,995,848 s/min), 1,393,186,525 distinct states found (1,781,629 ds/min), 193,784,358 states left on queue. +Progress(43) at 2024-11-07 01:11:03: 16,983,710,894 states generated (29,072,361 s/min), 1,395,018,264 distinct states found (1,831,739 ds/min), 193,697,690 states left on queue. +Progress(43) at 2024-11-07 01:12:03: 17,012,741,316 states generated (29,030,422 s/min), 1,397,039,325 distinct states found (2,021,061 ds/min), 193,755,919 states left on queue. +Progress(43) at 2024-11-07 01:13:03: 17,041,674,538 states generated (28,933,222 s/min), 1,399,086,352 distinct states found (2,047,027 ds/min), 193,799,420 states left on queue. +Progress(43) at 2024-11-07 01:14:03: 17,070,653,912 states generated (28,979,374 s/min), 1,401,092,312 distinct states found (2,005,960 ds/min), 193,820,018 states left on queue. +Progress(43) at 2024-11-07 01:15:03: 17,099,536,446 states generated (28,882,534 s/min), 1,403,159,743 distinct states found (2,067,431 ds/min), 193,867,947 states left on queue. +Progress(43) at 2024-11-07 01:16:03: 17,128,396,670 states generated (28,860,224 s/min), 1,405,244,280 distinct states found (2,084,537 ds/min), 193,945,380 states left on queue. +Progress(43) at 2024-11-07 01:17:03: 17,157,276,177 states generated (28,879,507 s/min), 1,407,274,748 distinct states found (2,030,468 ds/min), 193,944,077 states left on queue. +Progress(43) at 2024-11-07 01:18:03: 17,186,149,639 states generated (28,873,462 s/min), 1,409,283,088 distinct states found (2,008,340 ds/min), 193,881,792 states left on queue. +Progress(43) at 2024-11-07 01:19:03: 17,214,923,206 states generated (28,773,567 s/min), 1,411,167,065 distinct states found (1,883,977 ds/min), 193,711,394 states left on queue. +Progress(43) at 2024-11-07 01:20:03: 17,243,730,245 states generated (28,807,039 s/min), 1,413,023,763 distinct states found (1,856,698 ds/min), 193,546,054 states left on queue. +Progress(43) at 2024-11-07 01:21:03: 17,272,650,525 states generated (28,920,280 s/min), 1,414,802,171 distinct states found (1,778,408 ds/min), 193,345,308 states left on queue. +Progress(43) at 2024-11-07 01:22:03: 17,301,943,589 states generated (29,293,064 s/min), 1,416,599,440 distinct states found (1,797,269 ds/min), 193,158,676 states left on queue. +Progress(43) at 2024-11-07 01:23:03: 17,331,337,313 states generated (29,393,724 s/min), 1,418,547,450 distinct states found (1,948,010 ds/min), 193,112,883 states left on queue. +Progress(43) at 2024-11-07 01:24:03: 17,360,793,100 states generated (29,455,787 s/min), 1,420,576,018 distinct states found (2,028,568 ds/min), 193,100,476 states left on queue. +Progress(43) at 2024-11-07 01:25:03: 17,390,123,392 states generated (29,330,292 s/min), 1,422,693,479 distinct states found (2,117,461 ds/min), 193,171,748 states left on queue. +Progress(43) at 2024-11-07 01:26:03: 17,419,468,515 states generated (29,345,123 s/min), 1,424,783,244 distinct states found (2,089,765 ds/min), 193,228,274 states left on queue. +Progress(43) at 2024-11-07 01:27:03: 17,448,810,016 states generated (29,341,501 s/min), 1,426,560,811 distinct states found (1,777,567 ds/min), 193,036,459 states left on queue. +Progress(43) at 2024-11-07 01:28:03: 17,478,034,472 states generated (29,224,456 s/min), 1,428,663,374 distinct states found (2,102,563 ds/min), 193,125,616 states left on queue. +Progress(43) at 2024-11-07 01:29:03: 17,507,201,835 states generated (29,167,363 s/min), 1,430,735,910 distinct states found (2,072,536 ds/min), 193,146,850 states left on queue. +Progress(43) at 2024-11-07 01:30:03: 17,536,546,498 states generated (29,344,663 s/min), 1,432,877,950 distinct states found (2,142,040 ds/min), 193,230,645 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 01:31:04) +Progress(43) at 2024-11-07 01:31:04: 17,566,061,546 states generated (29,515,048 s/min), 1,434,839,708 distinct states found (1,961,758 ds/min), 193,176,951 states left on queue. +Progress(43) at 2024-11-07 01:32:04: 17,595,015,993 states generated (28,954,447 s/min), 1,436,986,257 distinct states found (2,146,549 ds/min), 193,289,254 states left on queue. +Progress(43) at 2024-11-07 01:33:04: 17,624,137,153 states generated (29,121,160 s/min), 1,439,279,150 distinct states found (2,292,893 ds/min), 193,525,973 states left on queue. +Progress(43) at 2024-11-07 01:34:04: 17,653,328,248 states generated (29,191,095 s/min), 1,441,299,767 distinct states found (2,020,617 ds/min), 193,504,947 states left on queue. +Progress(43) at 2024-11-07 01:35:04: 17,682,562,562 states generated (29,234,314 s/min), 1,443,317,413 distinct states found (2,017,646 ds/min), 193,471,905 states left on queue. +Progress(43) at 2024-11-07 01:36:04: 17,711,829,397 states generated (29,266,835 s/min), 1,445,304,310 distinct states found (1,986,897 ds/min), 193,370,899 states left on queue. +Progress(43) at 2024-11-07 01:37:04: 17,740,910,347 states generated (29,080,950 s/min), 1,447,009,563 distinct states found (1,705,253 ds/min), 193,129,235 states left on queue. +Progress(43) at 2024-11-07 01:38:04: 17,769,836,321 states generated (28,925,974 s/min), 1,449,139,496 distinct states found (2,129,933 ds/min), 193,234,511 states left on queue. +Progress(43) at 2024-11-07 01:39:04: 17,798,713,067 states generated (28,876,746 s/min), 1,451,211,612 distinct states found (2,072,116 ds/min), 193,241,362 states left on queue. +Progress(43) at 2024-11-07 01:40:04: 17,827,794,691 states generated (29,081,624 s/min), 1,453,062,046 distinct states found (1,850,434 ds/min), 193,114,753 states left on queue. +Progress(43) at 2024-11-07 01:41:04: 17,856,974,014 states generated (29,179,323 s/min), 1,455,151,187 distinct states found (2,089,141 ds/min), 193,169,579 states left on queue. +Progress(43) at 2024-11-07 01:42:04: 17,886,446,666 states generated (29,472,652 s/min), 1,457,303,171 distinct states found (2,151,984 ds/min), 193,263,708 states left on queue. +Progress(43) at 2024-11-07 01:43:04: 17,915,744,840 states generated (29,298,174 s/min), 1,459,261,460 distinct states found (1,958,289 ds/min), 193,194,468 states left on queue. +Progress(43) at 2024-11-07 01:44:04: 17,944,793,057 states generated (29,048,217 s/min), 1,460,885,305 distinct states found (1,623,845 ds/min), 192,905,330 states left on queue. +Progress(43) at 2024-11-07 01:45:04: 17,973,952,967 states generated (29,159,910 s/min), 1,462,880,642 distinct states found (1,995,337 ds/min), 192,871,348 states left on queue. +Progress(43) at 2024-11-07 01:46:04: 18,003,158,344 states generated (29,205,377 s/min), 1,465,077,846 distinct states found (2,197,204 ds/min), 193,039,702 states left on queue. +Progress(43) at 2024-11-07 01:47:04: 18,032,464,087 states generated (29,305,743 s/min), 1,467,361,120 distinct states found (2,283,274 ds/min), 193,271,051 states left on queue. +Progress(43) at 2024-11-07 01:48:04: 18,061,597,682 states generated (29,133,595 s/min), 1,469,505,688 distinct states found (2,144,568 ds/min), 193,354,360 states left on queue. +Progress(43) at 2024-11-07 01:49:04: 18,090,888,515 states generated (29,290,833 s/min), 1,471,655,035 distinct states found (2,149,347 ds/min), 193,472,080 states left on queue. +Progress(43) at 2024-11-07 01:50:04: 18,119,855,749 states generated (28,967,234 s/min), 1,473,959,147 distinct states found (2,304,112 ds/min), 193,714,821 states left on queue. +Progress(43) at 2024-11-07 01:51:04: 18,149,035,954 states generated (29,180,205 s/min), 1,476,253,894 distinct states found (2,294,747 ds/min), 193,939,051 states left on queue. +Progress(43) at 2024-11-07 01:52:04: 18,178,210,402 states generated (29,174,448 s/min), 1,478,557,699 distinct states found (2,303,805 ds/min), 194,141,809 states left on queue. +Progress(43) at 2024-11-07 01:53:04: 18,207,377,534 states generated (29,167,132 s/min), 1,480,870,404 distinct states found (2,312,705 ds/min), 194,307,877 states left on queue. +Progress(43) at 2024-11-07 01:54:04: 18,236,577,989 states generated (29,200,455 s/min), 1,483,070,823 distinct states found (2,200,419 ds/min), 194,387,223 states left on queue. +Progress(43) at 2024-11-07 01:55:04: 18,265,859,163 states generated (29,281,174 s/min), 1,485,222,154 distinct states found (2,151,331 ds/min), 194,522,233 states left on queue. +Progress(43) at 2024-11-07 01:56:04: 18,295,148,797 states generated (29,289,634 s/min), 1,487,521,283 distinct states found (2,299,129 ds/min), 194,755,427 states left on queue. +Progress(43) at 2024-11-07 01:57:04: 18,324,289,175 states generated (29,140,378 s/min), 1,489,367,193 distinct states found (1,845,910 ds/min), 194,604,366 states left on queue. +Progress(43) at 2024-11-07 01:58:04: 18,353,385,770 states generated (29,096,595 s/min), 1,491,503,782 distinct states found (2,136,589 ds/min), 194,651,670 states left on queue. +Progress(43) at 2024-11-07 01:59:04: 18,382,277,307 states generated (28,891,537 s/min), 1,493,659,362 distinct states found (2,155,580 ds/min), 194,761,640 states left on queue. +Progress(43) at 2024-11-07 02:00:04: 18,411,146,853 states generated (28,869,546 s/min), 1,495,935,237 distinct states found (2,275,875 ds/min), 194,908,896 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 02:01:05) +Progress(43) at 2024-11-07 02:01:05: 18,440,532,837 states generated (29,385,984 s/min), 1,497,937,249 distinct states found (2,002,012 ds/min), 194,874,096 states left on queue. +Progress(43) at 2024-11-07 02:02:05: 18,469,385,511 states generated (28,852,674 s/min), 1,500,062,084 distinct states found (2,124,835 ds/min), 194,900,930 states left on queue. +Progress(43) at 2024-11-07 02:03:05: 18,498,509,160 states generated (29,123,649 s/min), 1,502,077,387 distinct states found (2,015,303 ds/min), 194,871,250 states left on queue. +Progress(43) at 2024-11-07 02:04:05: 18,527,694,520 states generated (29,185,360 s/min), 1,504,242,136 distinct states found (2,164,749 ds/min), 194,924,748 states left on queue. +Progress(43) at 2024-11-07 02:05:05: 18,556,901,350 states generated (29,206,830 s/min), 1,506,034,687 distinct states found (1,792,551 ds/min), 194,670,609 states left on queue. +Progress(43) at 2024-11-07 02:06:05: 18,586,004,706 states generated (29,103,356 s/min), 1,507,879,191 distinct states found (1,844,504 ds/min), 194,551,782 states left on queue. +Progress(43) at 2024-11-07 02:07:05: 18,614,881,319 states generated (28,876,613 s/min), 1,510,019,997 distinct states found (2,140,806 ds/min), 194,594,957 states left on queue. +Progress(43) at 2024-11-07 02:08:05: 18,643,854,322 states generated (28,973,003 s/min), 1,512,074,165 distinct states found (2,054,168 ds/min), 194,532,832 states left on queue. +Progress(43) at 2024-11-07 02:09:05: 18,672,998,550 states generated (29,144,228 s/min), 1,513,943,120 distinct states found (1,868,955 ds/min), 194,368,599 states left on queue. +Progress(43) at 2024-11-07 02:10:05: 18,702,201,308 states generated (29,202,758 s/min), 1,515,546,068 distinct states found (1,602,948 ds/min), 194,090,755 states left on queue. +Progress(43) at 2024-11-07 02:11:05: 18,731,481,011 states generated (29,279,703 s/min), 1,517,343,788 distinct states found (1,797,720 ds/min), 193,942,961 states left on queue. +Progress(43) at 2024-11-07 02:12:05: 18,760,609,986 states generated (29,128,975 s/min), 1,519,160,050 distinct states found (1,816,262 ds/min), 193,815,502 states left on queue. +Progress(43) at 2024-11-07 02:13:05: 18,789,628,202 states generated (29,018,216 s/min), 1,520,860,123 distinct states found (1,700,073 ds/min), 193,642,399 states left on queue. +Progress(43) at 2024-11-07 02:14:05: 18,818,770,407 states generated (29,142,205 s/min), 1,522,616,126 distinct states found (1,756,003 ds/min), 193,516,180 states left on queue. +Progress(43) at 2024-11-07 02:15:05: 18,847,943,521 states generated (29,173,114 s/min), 1,524,373,878 distinct states found (1,757,752 ds/min), 193,352,389 states left on queue. +Progress(43) at 2024-11-07 02:16:05: 18,877,338,814 states generated (29,395,293 s/min), 1,526,022,199 distinct states found (1,648,321 ds/min), 193,099,089 states left on queue. +Progress(43) at 2024-11-07 02:17:05: 18,906,854,907 states generated (29,516,093 s/min), 1,528,057,287 distinct states found (2,035,088 ds/min), 193,164,007 states left on queue. +Progress(43) at 2024-11-07 02:18:05: 18,936,272,714 states generated (29,417,807 s/min), 1,530,070,868 distinct states found (2,013,581 ds/min), 193,195,191 states left on queue. +Progress(43) at 2024-11-07 02:19:05: 18,965,845,291 states generated (29,572,577 s/min), 1,531,953,514 distinct states found (1,882,646 ds/min), 193,094,610 states left on queue. +Progress(44) at 2024-11-07 02:20:05: 18,995,225,711 states generated (29,380,420 s/min), 1,533,586,486 distinct states found (1,632,972 ds/min), 192,813,292 states left on queue. +Progress(44) at 2024-11-07 02:21:05: 19,024,424,249 states generated (29,198,538 s/min), 1,535,341,846 distinct states found (1,755,360 ds/min), 192,665,431 states left on queue. +Progress(44) at 2024-11-07 02:22:05: 19,053,319,611 states generated (28,895,362 s/min), 1,536,913,652 distinct states found (1,571,806 ds/min), 192,336,687 states left on queue. +Progress(44) at 2024-11-07 02:23:05: 19,082,456,366 states generated (29,136,755 s/min), 1,538,781,638 distinct states found (1,867,986 ds/min), 192,258,068 states left on queue. +Progress(44) at 2024-11-07 02:24:05: 19,111,445,941 states generated (28,989,575 s/min), 1,540,696,734 distinct states found (1,915,096 ds/min), 192,193,602 states left on queue. +Progress(44) at 2024-11-07 02:25:05: 19,140,498,683 states generated (29,052,742 s/min), 1,542,368,994 distinct states found (1,672,260 ds/min), 191,938,239 states left on queue. +Progress(44) at 2024-11-07 02:26:05: 19,169,386,645 states generated (28,887,962 s/min), 1,544,099,236 distinct states found (1,730,242 ds/min), 191,741,059 states left on queue. +Progress(44) at 2024-11-07 02:27:05: 19,198,354,957 states generated (28,968,312 s/min), 1,545,891,836 distinct states found (1,792,600 ds/min), 191,577,211 states left on queue. +Progress(44) at 2024-11-07 02:28:05: 19,227,551,398 states generated (29,196,441 s/min), 1,547,751,807 distinct states found (1,859,971 ds/min), 191,530,291 states left on queue. +Progress(44) at 2024-11-07 02:29:05: 19,256,905,544 states generated (29,354,146 s/min), 1,549,562,753 distinct states found (1,810,946 ds/min), 191,492,536 states left on queue. +Progress(44) at 2024-11-07 02:30:05: 19,286,043,009 states generated (29,137,465 s/min), 1,551,387,062 distinct states found (1,824,309 ds/min), 191,432,131 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 02:31:05) +Progress(44) at 2024-11-07 02:31:05: 19,315,478,636 states generated (29,435,627 s/min), 1,553,684,416 distinct states found (2,297,354 ds/min), 191,685,387 states left on queue. +Progress(44) at 2024-11-07 02:32:05: 19,344,574,433 states generated (29,095,797 s/min), 1,555,642,251 distinct states found (1,957,835 ds/min), 191,687,380 states left on queue. +Progress(44) at 2024-11-07 02:33:05: 19,373,560,321 states generated (28,985,888 s/min), 1,557,576,032 distinct states found (1,933,781 ds/min), 191,644,771 states left on queue. +Progress(44) at 2024-11-07 02:34:05: 19,402,882,849 states generated (29,322,528 s/min), 1,559,483,211 distinct states found (1,907,179 ds/min), 191,584,351 states left on queue. +Progress(44) at 2024-11-07 02:35:05: 19,432,084,827 states generated (29,201,978 s/min), 1,561,305,888 distinct states found (1,822,677 ds/min), 191,432,596 states left on queue. +Progress(44) at 2024-11-07 02:36:05: 19,461,112,335 states generated (29,027,508 s/min), 1,563,180,797 distinct states found (1,874,909 ds/min), 191,330,553 states left on queue. +Progress(44) at 2024-11-07 02:37:05: 19,490,043,498 states generated (28,931,163 s/min), 1,565,137,368 distinct states found (1,956,571 ds/min), 191,292,333 states left on queue. +Progress(44) at 2024-11-07 02:38:05: 19,519,153,014 states generated (29,109,516 s/min), 1,567,034,954 distinct states found (1,897,586 ds/min), 191,229,524 states left on queue. +Progress(44) at 2024-11-07 02:39:05: 19,548,204,678 states generated (29,051,664 s/min), 1,568,989,443 distinct states found (1,954,489 ds/min), 191,191,752 states left on queue. +Progress(44) at 2024-11-07 02:40:05: 19,577,227,470 states generated (29,022,792 s/min), 1,570,981,495 distinct states found (1,992,052 ds/min), 191,200,024 states left on queue. +Progress(44) at 2024-11-07 02:41:05: 19,606,172,601 states generated (28,945,131 s/min), 1,572,870,324 distinct states found (1,888,829 ds/min), 191,115,956 states left on queue. +Progress(44) at 2024-11-07 02:42:05: 19,635,167,481 states generated (28,994,880 s/min), 1,574,894,468 distinct states found (2,024,144 ds/min), 191,139,869 states left on queue. +Progress(44) at 2024-11-07 02:43:05: 19,664,339,049 states generated (29,171,568 s/min), 1,576,906,348 distinct states found (2,011,880 ds/min), 191,137,521 states left on queue. +Progress(44) at 2024-11-07 02:44:05: 19,693,639,689 states generated (29,300,640 s/min), 1,578,748,425 distinct states found (1,842,077 ds/min), 191,040,518 states left on queue. +Progress(44) at 2024-11-07 02:45:05: 19,722,704,536 states generated (29,064,847 s/min), 1,580,671,538 distinct states found (1,923,113 ds/min), 191,001,469 states left on queue. +Progress(44) at 2024-11-07 02:46:05: 19,751,627,669 states generated (28,923,133 s/min), 1,582,340,762 distinct states found (1,669,224 ds/min), 190,750,504 states left on queue. +Progress(44) at 2024-11-07 02:47:05: 19,780,532,535 states generated (28,904,866 s/min), 1,583,965,049 distinct states found (1,624,287 ds/min), 190,492,540 states left on queue. +Progress(44) at 2024-11-07 02:48:05: 19,809,548,743 states generated (29,016,208 s/min), 1,585,820,774 distinct states found (1,855,725 ds/min), 190,422,454 states left on queue. +Progress(44) at 2024-11-07 02:49:05: 19,838,541,075 states generated (28,992,332 s/min), 1,587,731,649 distinct states found (1,910,875 ds/min), 190,386,932 states left on queue. +Progress(44) at 2024-11-07 02:50:05: 19,867,458,320 states generated (28,917,245 s/min), 1,589,622,141 distinct states found (1,890,492 ds/min), 190,310,460 states left on queue. +Progress(44) at 2024-11-07 02:51:05: 19,896,287,158 states generated (28,828,838 s/min), 1,591,517,151 distinct states found (1,895,010 ds/min), 190,235,561 states left on queue. +Progress(44) at 2024-11-07 02:52:05: 19,925,117,820 states generated (28,830,662 s/min), 1,593,453,289 distinct states found (1,936,138 ds/min), 190,176,789 states left on queue. +Progress(44) at 2024-11-07 02:53:05: 19,953,949,651 states generated (28,831,831 s/min), 1,595,392,832 distinct states found (1,939,543 ds/min), 190,137,713 states left on queue. +Progress(44) at 2024-11-07 02:54:05: 19,982,791,590 states generated (28,841,939 s/min), 1,597,295,182 distinct states found (1,902,350 ds/min), 190,030,864 states left on queue. +Progress(44) at 2024-11-07 02:55:05: 20,011,631,796 states generated (28,840,206 s/min), 1,599,162,388 distinct states found (1,867,206 ds/min), 189,857,155 states left on queue. +Progress(44) at 2024-11-07 02:56:05: 20,040,350,017 states generated (28,718,221 s/min), 1,600,882,747 distinct states found (1,720,359 ds/min), 189,556,504 states left on queue. +Progress(44) at 2024-11-07 02:57:05: 20,069,048,267 states generated (28,698,250 s/min), 1,602,583,945 distinct states found (1,701,198 ds/min), 189,276,085 states left on queue. +Progress(44) at 2024-11-07 02:58:05: 20,098,037,079 states generated (28,988,812 s/min), 1,604,245,937 distinct states found (1,661,992 ds/min), 188,968,070 states left on queue. +Progress(44) at 2024-11-07 02:59:05: 20,127,216,730 states generated (29,179,651 s/min), 1,605,916,753 distinct states found (1,670,816 ds/min), 188,703,437 states left on queue. +Progress(44) at 2024-11-07 03:00:05: 20,156,712,917 states generated (29,496,187 s/min), 1,607,868,866 distinct states found (1,952,113 ds/min), 188,640,553 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 03:01:06) +Progress(44) at 2024-11-07 03:01:06: 20,186,396,044 states generated (29,683,127 s/min), 1,609,765,772 distinct states found (1,896,906 ds/min), 188,510,848 states left on queue. +Progress(44) at 2024-11-07 03:02:06: 20,215,754,864 states generated (29,358,820 s/min), 1,611,924,139 distinct states found (2,158,367 ds/min), 188,607,723 states left on queue. +Progress(44) at 2024-11-07 03:03:06: 20,245,041,982 states generated (29,287,118 s/min), 1,613,794,702 distinct states found (1,870,563 ds/min), 188,472,700 states left on queue. +Progress(44) at 2024-11-07 03:04:06: 20,274,294,374 states generated (29,252,392 s/min), 1,615,566,733 distinct states found (1,772,031 ds/min), 188,311,061 states left on queue. +Progress(44) at 2024-11-07 03:05:06: 20,303,317,537 states generated (29,023,163 s/min), 1,617,541,966 distinct states found (1,975,233 ds/min), 188,275,227 states left on queue. +Progress(44) at 2024-11-07 03:06:06: 20,332,555,917 states generated (29,238,380 s/min), 1,619,626,477 distinct states found (2,084,511 ds/min), 188,311,129 states left on queue. +Progress(44) at 2024-11-07 03:07:06: 20,361,814,948 states generated (29,259,031 s/min), 1,621,498,944 distinct states found (1,872,467 ds/min), 188,187,982 states left on queue. +Progress(44) at 2024-11-07 03:08:06: 20,391,066,062 states generated (29,251,114 s/min), 1,623,499,145 distinct states found (2,000,201 ds/min), 188,184,372 states left on queue. +Progress(44) at 2024-11-07 03:09:06: 20,420,013,539 states generated (28,947,477 s/min), 1,625,534,256 distinct states found (2,035,111 ds/min), 188,202,174 states left on queue. +Progress(44) at 2024-11-07 03:10:06: 20,449,116,787 states generated (29,103,248 s/min), 1,627,670,135 distinct states found (2,135,879 ds/min), 188,303,061 states left on queue. +Progress(44) at 2024-11-07 03:11:06: 20,478,265,224 states generated (29,148,437 s/min), 1,629,558,947 distinct states found (1,888,812 ds/min), 188,171,995 states left on queue. +Progress(44) at 2024-11-07 03:12:06: 20,507,459,785 states generated (29,194,561 s/min), 1,631,460,915 distinct states found (1,901,968 ds/min), 188,044,516 states left on queue. +Progress(44) at 2024-11-07 03:13:06: 20,536,655,025 states generated (29,195,240 s/min), 1,633,292,515 distinct states found (1,831,600 ds/min), 187,823,678 states left on queue. +Progress(44) at 2024-11-07 03:14:06: 20,565,699,198 states generated (29,044,173 s/min), 1,634,967,122 distinct states found (1,674,607 ds/min), 187,564,357 states left on queue. +Progress(44) at 2024-11-07 03:15:06: 20,594,568,781 states generated (28,869,583 s/min), 1,636,996,440 distinct states found (2,029,318 ds/min), 187,577,506 states left on queue. +Progress(44) at 2024-11-07 03:16:06: 20,623,463,526 states generated (28,894,745 s/min), 1,638,870,718 distinct states found (1,874,278 ds/min), 187,429,057 states left on queue. +Progress(44) at 2024-11-07 03:17:06: 20,652,517,975 states generated (29,054,449 s/min), 1,640,608,054 distinct states found (1,737,336 ds/min), 187,198,996 states left on queue. +Progress(44) at 2024-11-07 03:18:06: 20,681,729,377 states generated (29,211,402 s/min), 1,642,682,611 distinct states found (2,074,557 ds/min), 187,238,673 states left on queue. +Progress(44) at 2024-11-07 03:19:06: 20,711,226,363 states generated (29,496,986 s/min), 1,644,764,480 distinct states found (2,081,869 ds/min), 187,269,746 states left on queue. +Progress(44) at 2024-11-07 03:20:06: 20,740,520,876 states generated (29,294,513 s/min), 1,646,565,948 distinct states found (1,801,468 ds/min), 187,085,841 states left on queue. +Progress(44) at 2024-11-07 03:21:06: 20,769,532,066 states generated (29,011,190 s/min), 1,648,139,570 distinct states found (1,573,622 ds/min), 186,737,971 states left on queue. +Progress(44) at 2024-11-07 03:22:06: 20,798,731,555 states generated (29,199,489 s/min), 1,650,061,318 distinct states found (1,921,748 ds/min), 186,652,080 states left on queue. +Progress(44) at 2024-11-07 03:23:06: 20,827,864,871 states generated (29,133,316 s/min), 1,652,217,368 distinct states found (2,156,050 ds/min), 186,786,338 states left on queue. +Progress(44) at 2024-11-07 03:24:06: 20,857,114,542 states generated (29,249,671 s/min), 1,654,404,059 distinct states found (2,186,691 ds/min), 186,937,127 states left on queue. +Progress(44) at 2024-11-07 03:25:06: 20,886,216,235 states generated (29,101,693 s/min), 1,656,424,687 distinct states found (2,020,628 ds/min), 186,925,384 states left on queue. +Progress(44) at 2024-11-07 03:26:06: 20,915,415,138 states generated (29,198,903 s/min), 1,658,503,968 distinct states found (2,079,281 ds/min), 186,988,200 states left on queue. +Progress(44) at 2024-11-07 03:27:06: 20,944,436,117 states generated (29,020,979 s/min), 1,660,708,925 distinct states found (2,204,957 ds/min), 187,151,771 states left on queue. +Progress(44) at 2024-11-07 03:28:06: 20,973,637,986 states generated (29,201,869 s/min), 1,662,812,161 distinct states found (2,103,236 ds/min), 187,208,363 states left on queue. +Progress(44) at 2024-11-07 03:29:06: 21,002,664,654 states generated (29,026,668 s/min), 1,665,077,078 distinct states found (2,264,917 ds/min), 187,398,168 states left on queue. +Progress(44) at 2024-11-07 03:30:06: 21,031,900,683 states generated (29,236,029 s/min), 1,667,241,517 distinct states found (2,164,439 ds/min), 187,444,342 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 03:31:07) +Progress(44) at 2024-11-07 03:31:07: 21,061,190,967 states generated (29,290,284 s/min), 1,669,337,346 distinct states found (2,095,829 ds/min), 187,431,388 states left on queue. +Progress(44) at 2024-11-07 03:32:07: 21,090,368,622 states generated (29,177,655 s/min), 1,671,292,120 distinct states found (1,954,774 ds/min), 187,370,395 states left on queue. +Progress(44) at 2024-11-07 03:33:07: 21,119,546,588 states generated (29,177,966 s/min), 1,673,505,061 distinct states found (2,212,941 ds/min), 187,548,275 states left on queue. +Progress(44) at 2024-11-07 03:34:07: 21,148,770,544 states generated (29,223,956 s/min), 1,675,679,331 distinct states found (2,174,270 ds/min), 187,681,477 states left on queue. +Progress(44) at 2024-11-07 03:35:07: 21,177,752,842 states generated (28,982,298 s/min), 1,677,512,003 distinct states found (1,832,672 ds/min), 187,502,663 states left on queue. +Progress(44) at 2024-11-07 03:36:07: 21,206,777,989 states generated (29,025,147 s/min), 1,679,391,120 distinct states found (1,879,117 ds/min), 187,345,876 states left on queue. +Progress(44) at 2024-11-07 03:37:07: 21,235,634,562 states generated (28,856,573 s/min), 1,681,551,461 distinct states found (2,160,341 ds/min), 187,464,887 states left on queue. +Progress(44) at 2024-11-07 03:38:07: 21,264,448,690 states generated (28,814,128 s/min), 1,683,690,836 distinct states found (2,139,375 ds/min), 187,491,922 states left on queue. +Progress(44) at 2024-11-07 03:39:07: 21,293,469,454 states generated (29,020,764 s/min), 1,685,615,643 distinct states found (1,924,807 ds/min), 187,416,199 states left on queue. +Progress(44) at 2024-11-07 03:40:07: 21,322,287,082 states generated (28,817,628 s/min), 1,687,574,723 distinct states found (1,959,080 ds/min), 187,310,981 states left on queue. +Progress(44) at 2024-11-07 03:41:07: 21,351,396,680 states generated (29,109,598 s/min), 1,689,546,445 distinct states found (1,971,722 ds/min), 187,236,923 states left on queue. +Progress(44) at 2024-11-07 03:42:07: 21,380,557,165 states generated (29,160,485 s/min), 1,691,587,169 distinct states found (2,040,724 ds/min), 187,186,480 states left on queue. +Progress(44) at 2024-11-07 03:43:07: 21,409,627,333 states generated (29,070,168 s/min), 1,693,246,645 distinct states found (1,659,476 ds/min), 186,839,410 states left on queue. +Progress(44) at 2024-11-07 03:44:07: 21,438,692,500 states generated (29,065,167 s/min), 1,695,162,088 distinct states found (1,915,443 ds/min), 186,763,843 states left on queue. +Progress(44) at 2024-11-07 03:45:07: 21,467,558,980 states generated (28,866,480 s/min), 1,697,105,328 distinct states found (1,943,240 ds/min), 186,647,091 states left on queue. +Progress(44) at 2024-11-07 03:46:07: 21,496,459,596 states generated (28,900,616 s/min), 1,698,987,134 distinct states found (1,881,806 ds/min), 186,428,411 states left on queue. +Progress(44) at 2024-11-07 03:47:07: 21,525,539,564 states generated (29,079,968 s/min), 1,700,685,335 distinct states found (1,698,201 ds/min), 186,176,831 states left on queue. +Progress(44) at 2024-11-07 03:48:07: 21,554,716,115 states generated (29,176,551 s/min), 1,702,193,633 distinct states found (1,508,298 ds/min), 185,811,852 states left on queue. +Progress(44) at 2024-11-07 03:49:07: 21,583,930,332 states generated (29,214,217 s/min), 1,703,965,186 distinct states found (1,771,553 ds/min), 185,645,122 states left on queue. +Progress(44) at 2024-11-07 03:50:07: 21,612,870,304 states generated (28,939,972 s/min), 1,705,581,017 distinct states found (1,615,831 ds/min), 185,385,482 states left on queue. +Progress(44) at 2024-11-07 03:51:07: 21,641,828,993 states generated (28,958,689 s/min), 1,707,209,695 distinct states found (1,628,678 ds/min), 185,147,878 states left on queue. +Progress(44) at 2024-11-07 03:52:07: 21,670,879,227 states generated (29,050,234 s/min), 1,708,891,056 distinct states found (1,681,361 ds/min), 184,967,950 states left on queue. +Progress(44) at 2024-11-07 03:53:07: 21,700,175,853 states generated (29,296,626 s/min), 1,710,442,845 distinct states found (1,551,789 ds/min), 184,628,950 states left on queue. +Progress(44) at 2024-11-07 03:54:07: 21,729,661,920 states generated (29,486,067 s/min), 1,712,360,375 distinct states found (1,917,530 ds/min), 184,602,047 states left on queue. +Progress(44) at 2024-11-07 03:55:07: 21,759,015,470 states generated (29,353,550 s/min), 1,714,259,170 distinct states found (1,898,795 ds/min), 184,554,564 states left on queue. +Progress(44) at 2024-11-07 03:56:07: 21,788,534,088 states generated (29,518,618 s/min), 1,716,081,999 distinct states found (1,822,829 ds/min), 184,406,994 states left on queue. +Progress(44) at 2024-11-07 03:57:07: 21,817,875,474 states generated (29,341,386 s/min), 1,717,634,611 distinct states found (1,552,612 ds/min), 184,057,660 states left on queue. +Progress(44) at 2024-11-07 03:58:07: 21,847,006,510 states generated (29,131,036 s/min), 1,719,299,741 distinct states found (1,665,130 ds/min), 183,828,258 states left on queue. +Progress(44) at 2024-11-07 03:59:07: 21,875,869,357 states generated (28,862,847 s/min), 1,720,801,722 distinct states found (1,501,981 ds/min), 183,443,083 states left on queue. +Progress(44) at 2024-11-07 04:00:07: 21,904,922,732 states generated (29,053,375 s/min), 1,722,588,504 distinct states found (1,786,782 ds/min), 183,289,094 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 04:01:07) +Progress(44) at 2024-11-07 04:01:07: 21,933,965,695 states generated (29,042,963 s/min), 1,724,285,279 distinct states found (1,696,775 ds/min), 183,029,310 states left on queue. +Progress(44) at 2024-11-07 04:02:07: 21,962,959,341 states generated (28,993,646 s/min), 1,725,868,213 distinct states found (1,582,934 ds/min), 182,699,155 states left on queue. +Progress(44) at 2024-11-07 04:03:07: 21,991,777,816 states generated (28,818,475 s/min), 1,727,519,032 distinct states found (1,650,819 ds/min), 182,433,701 states left on queue. +Progress(44) at 2024-11-07 04:04:07: 22,020,733,433 states generated (28,955,617 s/min), 1,729,219,503 distinct states found (1,700,471 ds/min), 182,216,615 states left on queue. +Progress(44) at 2024-11-07 04:05:07: 22,049,984,634 states generated (29,251,201 s/min), 1,730,967,606 distinct states found (1,748,103 ds/min), 182,140,987 states left on queue. +Progress(44) at 2024-11-07 04:06:07: 22,079,112,674 states generated (29,128,040 s/min), 1,732,648,368 distinct states found (1,680,762 ds/min), 181,963,576 states left on queue. +Progress(44) at 2024-11-07 04:07:07: 22,108,329,917 states generated (29,217,243 s/min), 1,734,711,160 distinct states found (2,062,792 ds/min), 182,074,434 states left on queue. +Progress(44) at 2024-11-07 04:08:07: 22,137,402,322 states generated (29,072,405 s/min), 1,736,773,111 distinct states found (2,061,951 ds/min), 182,163,318 states left on queue. +Progress(44) at 2024-11-07 04:09:07: 22,166,402,243 states generated (28,999,921 s/min), 1,738,573,615 distinct states found (1,800,504 ds/min), 182,034,194 states left on queue. +Progress(44) at 2024-11-07 04:10:07: 22,195,545,763 states generated (29,143,520 s/min), 1,740,349,901 distinct states found (1,776,286 ds/min), 181,869,339 states left on queue. +Progress(44) at 2024-11-07 04:11:07: 22,224,766,309 states generated (29,220,546 s/min), 1,742,110,577 distinct states found (1,760,676 ds/min), 181,671,885 states left on queue. +Progress(44) at 2024-11-07 04:12:07: 22,253,807,692 states generated (29,041,383 s/min), 1,743,796,752 distinct states found (1,686,175 ds/min), 181,407,584 states left on queue. +Progress(44) at 2024-11-07 04:13:07: 22,282,790,947 states generated (28,983,255 s/min), 1,745,617,175 distinct states found (1,820,423 ds/min), 181,265,096 states left on queue. +Progress(44) at 2024-11-07 04:14:07: 22,311,840,917 states generated (29,049,970 s/min), 1,747,424,658 distinct states found (1,807,483 ds/min), 181,110,335 states left on queue. +Progress(44) at 2024-11-07 04:15:07: 22,340,851,116 states generated (29,010,199 s/min), 1,749,204,899 distinct states found (1,780,241 ds/min), 180,933,264 states left on queue. +Progress(44) at 2024-11-07 04:16:07: 22,369,820,191 states generated (28,969,075 s/min), 1,751,058,290 distinct states found (1,853,391 ds/min), 180,819,450 states left on queue. +Progress(44) at 2024-11-07 04:17:07: 22,398,637,854 states generated (28,817,663 s/min), 1,752,838,012 distinct states found (1,779,722 ds/min), 180,641,066 states left on queue. +Progress(44) at 2024-11-07 04:18:07: 22,427,736,775 states generated (29,098,921 s/min), 1,754,678,716 distinct states found (1,840,704 ds/min), 180,523,907 states left on queue. +Progress(44) at 2024-11-07 04:19:07: 22,456,749,604 states generated (29,012,829 s/min), 1,756,653,204 distinct states found (1,974,488 ds/min), 180,502,441 states left on queue. +Progress(44) at 2024-11-07 04:20:07: 22,485,995,309 states generated (29,245,705 s/min), 1,758,406,219 distinct states found (1,753,015 ds/min), 180,303,710 states left on queue. +Progress(44) at 2024-11-07 04:21:07: 22,515,059,607 states generated (29,064,298 s/min), 1,760,239,858 distinct states found (1,833,639 ds/min), 180,203,277 states left on queue. +Progress(44) at 2024-11-07 04:22:07: 22,544,007,885 states generated (28,948,278 s/min), 1,761,871,023 distinct states found (1,631,165 ds/min), 179,919,396 states left on queue. +Progress(44) at 2024-11-07 04:23:07: 22,572,858,704 states generated (28,850,819 s/min), 1,763,420,170 distinct states found (1,549,147 ds/min), 179,579,696 states left on queue. +Progress(44) at 2024-11-07 04:24:07: 22,601,850,297 states generated (28,991,593 s/min), 1,765,118,103 distinct states found (1,697,933 ds/min), 179,386,571 states left on queue. +Progress(44) at 2024-11-07 04:25:07: 22,630,832,111 states generated (28,981,814 s/min), 1,766,934,802 distinct states found (1,816,699 ds/min), 179,271,264 states left on queue. +Progress(44) at 2024-11-07 04:26:07: 22,659,674,047 states generated (28,841,936 s/min), 1,768,697,425 distinct states found (1,762,623 ds/min), 179,093,059 states left on queue. +Progress(44) at 2024-11-07 04:27:07: 22,688,427,580 states generated (28,753,533 s/min), 1,770,450,184 distinct states found (1,752,759 ds/min), 178,899,489 states left on queue. +Progress(44) at 2024-11-07 04:28:07: 22,717,189,869 states generated (28,762,289 s/min), 1,772,256,239 distinct states found (1,806,055 ds/min), 178,731,640 states left on queue. +Progress(44) at 2024-11-07 04:29:07: 22,746,022,343 states generated (28,832,474 s/min), 1,774,044,050 distinct states found (1,787,811 ds/min), 178,570,129 states left on queue. +Progress(44) at 2024-11-07 04:30:07: 22,774,887,995 states generated (28,865,652 s/min), 1,775,840,059 distinct states found (1,796,009 ds/min), 178,368,886 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 04:31:08) +Progress(44) at 2024-11-07 04:31:08: 22,803,877,345 states generated (28,989,350 s/min), 1,777,539,486 distinct states found (1,699,427 ds/min), 178,036,764 states left on queue. +Progress(44) at 2024-11-07 04:32:08: 22,832,344,161 states generated (28,466,816 s/min), 1,779,071,939 distinct states found (1,532,453 ds/min), 177,609,496 states left on queue. +Progress(44) at 2024-11-07 04:33:08: 22,860,965,708 states generated (28,621,547 s/min), 1,780,632,191 distinct states found (1,560,252 ds/min), 177,226,645 states left on queue. +Progress(44) at 2024-11-07 04:34:08: 22,890,116,212 states generated (29,150,504 s/min), 1,782,192,671 distinct states found (1,560,480 ds/min), 176,856,967 states left on queue. +Progress(44) at 2024-11-07 04:35:08: 22,919,394,798 states generated (29,278,586 s/min), 1,783,989,997 distinct states found (1,797,326 ds/min), 176,677,020 states left on queue. +Progress(44) at 2024-11-07 04:36:08: 22,948,717,272 states generated (29,322,474 s/min), 1,785,769,628 distinct states found (1,779,631 ds/min), 176,466,304 states left on queue. +Progress(44) at 2024-11-07 04:37:08: 22,978,008,874 states generated (29,291,602 s/min), 1,787,768,546 distinct states found (1,998,918 ds/min), 176,439,057 states left on queue. +Progress(45) at 2024-11-07 04:38:08: 23,007,259,342 states generated (29,250,468 s/min), 1,789,652,180 distinct states found (1,883,634 ds/min), 176,335,868 states left on queue. +Progress(45) at 2024-11-07 04:39:08: 23,036,414,234 states generated (29,154,892 s/min), 1,791,293,395 distinct states found (1,641,215 ds/min), 176,048,834 states left on queue. +Progress(45) at 2024-11-07 04:40:08: 23,065,467,218 states generated (29,052,984 s/min), 1,793,176,180 distinct states found (1,882,785 ds/min), 175,945,068 states left on queue. +Progress(45) at 2024-11-07 04:41:08: 23,094,601,413 states generated (29,134,195 s/min), 1,795,085,201 distinct states found (1,909,021 ds/min), 175,844,471 states left on queue. +Progress(45) at 2024-11-07 04:42:08: 23,123,835,299 states generated (29,233,886 s/min), 1,796,998,629 distinct states found (1,913,428 ds/min), 175,751,026 states left on queue. +Progress(45) at 2024-11-07 04:43:08: 23,153,014,383 states generated (29,179,084 s/min), 1,798,830,917 distinct states found (1,832,288 ds/min), 175,609,899 states left on queue. +Progress(45) at 2024-11-07 04:44:08: 23,181,848,791 states generated (28,834,408 s/min), 1,800,688,969 distinct states found (1,858,052 ds/min), 175,482,089 states left on queue. +Progress(45) at 2024-11-07 04:45:08: 23,210,960,242 states generated (29,111,451 s/min), 1,802,681,838 distinct states found (1,992,869 ds/min), 175,468,259 states left on queue. +Progress(45) at 2024-11-07 04:46:08: 23,239,931,898 states generated (28,971,656 s/min), 1,804,527,297 distinct states found (1,845,459 ds/min), 175,314,676 states left on queue. +Progress(45) at 2024-11-07 04:47:08: 23,269,110,236 states generated (29,178,338 s/min), 1,806,324,412 distinct states found (1,797,115 ds/min), 175,104,294 states left on queue. +Progress(45) at 2024-11-07 04:48:08: 23,298,261,893 states generated (29,151,657 s/min), 1,808,026,372 distinct states found (1,701,960 ds/min), 174,789,761 states left on queue. +Progress(45) at 2024-11-07 04:49:08: 23,327,194,301 states generated (28,932,408 s/min), 1,809,635,143 distinct states found (1,608,771 ds/min), 174,475,327 states left on queue. +Progress(45) at 2024-11-07 04:50:08: 23,356,033,807 states generated (28,839,506 s/min), 1,811,533,685 distinct states found (1,898,542 ds/min), 174,375,697 states left on queue. +Progress(45) at 2024-11-07 04:51:08: 23,384,783,950 states generated (28,750,143 s/min), 1,813,242,773 distinct states found (1,709,088 ds/min), 174,093,638 states left on queue. +Progress(45) at 2024-11-07 04:52:08: 23,413,868,078 states generated (29,084,128 s/min), 1,814,921,217 distinct states found (1,678,444 ds/min), 173,816,375 states left on queue. +Progress(45) at 2024-11-07 04:53:08: 23,443,072,326 states generated (29,204,248 s/min), 1,816,887,463 distinct states found (1,966,246 ds/min), 173,768,064 states left on queue. +Progress(45) at 2024-11-07 04:54:08: 23,472,531,302 states generated (29,458,976 s/min), 1,818,893,389 distinct states found (2,005,926 ds/min), 173,736,986 states left on queue. +Progress(45) at 2024-11-07 04:55:08: 23,501,670,169 states generated (29,138,867 s/min), 1,820,467,013 distinct states found (1,573,624 ds/min), 173,393,980 states left on queue. +Progress(45) at 2024-11-07 04:56:08: 23,530,619,816 states generated (28,949,647 s/min), 1,822,153,389 distinct states found (1,686,376 ds/min), 173,102,476 states left on queue. +Progress(45) at 2024-11-07 04:57:08: 23,559,730,839 states generated (29,111,023 s/min), 1,824,067,840 distinct states found (1,914,451 ds/min), 173,045,910 states left on queue. +Progress(45) at 2024-11-07 04:58:08: 23,588,956,543 states generated (29,225,704 s/min), 1,826,128,132 distinct states found (2,060,292 ds/min), 173,097,456 states left on queue. +Progress(45) at 2024-11-07 04:59:08: 23,617,943,385 states generated (28,986,842 s/min), 1,828,156,857 distinct states found (2,028,725 ds/min), 173,115,797 states left on queue. +Progress(45) at 2024-11-07 05:00:08: 23,647,052,247 states generated (29,108,862 s/min), 1,830,116,296 distinct states found (1,959,439 ds/min), 173,061,677 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 05:01:09) +Progress(45) at 2024-11-07 05:01:09: 23,676,540,644 states generated (29,488,397 s/min), 1,832,081,172 distinct states found (1,964,876 ds/min), 173,019,523 states left on queue. +Progress(45) at 2024-11-07 05:02:09: 23,705,447,239 states generated (28,906,595 s/min), 1,834,157,962 distinct states found (2,076,790 ds/min), 173,069,444 states left on queue. +Progress(45) at 2024-11-07 05:03:09: 23,734,590,381 states generated (29,143,142 s/min), 1,836,148,599 distinct states found (1,990,637 ds/min), 173,037,041 states left on queue. +Progress(45) at 2024-11-07 05:04:09: 23,763,605,229 states generated (29,014,848 s/min), 1,838,302,051 distinct states found (2,153,452 ds/min), 173,135,339 states left on queue. +Progress(45) at 2024-11-07 05:05:09: 23,792,794,847 states generated (29,189,618 s/min), 1,840,318,078 distinct states found (2,016,027 ds/min), 173,064,676 states left on queue. +Progress(45) at 2024-11-07 05:06:09: 23,821,711,411 states generated (28,916,564 s/min), 1,842,248,819 distinct states found (1,930,741 ds/min), 172,938,116 states left on queue. +Progress(45) at 2024-11-07 05:07:09: 23,850,829,522 states generated (29,118,111 s/min), 1,844,084,520 distinct states found (1,835,701 ds/min), 172,779,569 states left on queue. +Progress(45) at 2024-11-07 05:08:09: 23,880,027,055 states generated (29,197,533 s/min), 1,846,207,907 distinct states found (2,123,387 ds/min), 172,876,875 states left on queue. +Progress(45) at 2024-11-07 05:09:09: 23,909,238,654 states generated (29,211,599 s/min), 1,848,275,162 distinct states found (2,067,255 ds/min), 172,917,710 states left on queue. +Progress(45) at 2024-11-07 05:10:09: 23,938,254,527 states generated (29,015,873 s/min), 1,850,062,508 distinct states found (1,787,346 ds/min), 172,709,939 states left on queue. +Progress(45) at 2024-11-07 05:11:09: 23,967,280,908 states generated (29,026,381 s/min), 1,851,840,844 distinct states found (1,778,336 ds/min), 172,472,809 states left on queue. +Progress(45) at 2024-11-07 05:12:09: 23,996,137,153 states generated (28,856,245 s/min), 1,853,907,711 distinct states found (2,066,867 ds/min), 172,514,422 states left on queue. +Progress(45) at 2024-11-07 05:13:09: 24,025,003,271 states generated (28,866,118 s/min), 1,855,881,596 distinct states found (1,973,885 ds/min), 172,410,581 states left on queue. +Progress(45) at 2024-11-07 05:14:09: 24,053,998,968 states generated (28,995,697 s/min), 1,857,730,142 distinct states found (1,848,546 ds/min), 172,259,468 states left on queue. +Progress(45) at 2024-11-07 05:15:09: 24,082,780,775 states generated (28,781,807 s/min), 1,859,612,879 distinct states found (1,882,737 ds/min), 172,097,889 states left on queue. +Progress(45) at 2024-11-07 05:16:09: 24,111,843,462 states generated (29,062,687 s/min), 1,861,479,353 distinct states found (1,866,474 ds/min), 171,938,834 states left on queue. +Progress(45) at 2024-11-07 05:17:09: 24,140,987,153 states generated (29,143,691 s/min), 1,863,390,493 distinct states found (1,911,140 ds/min), 171,786,752 states left on queue. +Progress(45) at 2024-11-07 05:18:09: 24,170,023,897 states generated (29,036,744 s/min), 1,864,965,603 distinct states found (1,575,110 ds/min), 171,386,848 states left on queue. +Progress(45) at 2024-11-07 05:19:09: 24,198,987,772 states generated (28,963,875 s/min), 1,866,820,638 distinct states found (1,855,035 ds/min), 171,238,575 states left on queue. +Progress(45) at 2024-11-07 05:20:09: 24,227,820,740 states generated (28,832,968 s/min), 1,868,623,853 distinct states found (1,803,215 ds/min), 171,005,974 states left on queue. +Progress(45) at 2024-11-07 05:21:09: 24,256,712,636 states generated (28,891,896 s/min), 1,870,265,139 distinct states found (1,641,286 ds/min), 170,619,838 states left on queue. +Progress(45) at 2024-11-07 05:22:09: 24,285,792,587 states generated (29,079,951 s/min), 1,871,770,548 distinct states found (1,505,409 ds/min), 170,247,019 states left on queue. +Progress(45) at 2024-11-07 05:23:09: 24,315,021,618 states generated (29,229,031 s/min), 1,873,433,426 distinct states found (1,662,878 ds/min), 169,986,497 states left on queue. +Progress(45) at 2024-11-07 05:24:09: 24,343,972,976 states generated (28,951,358 s/min), 1,874,958,509 distinct states found (1,525,083 ds/min), 169,639,357 states left on queue. +Progress(45) at 2024-11-07 05:25:09: 24,372,818,044 states generated (28,845,068 s/min), 1,876,461,909 distinct states found (1,503,400 ds/min), 169,298,313 states left on queue. +Progress(45) at 2024-11-07 05:26:09: 24,401,879,839 states generated (29,061,795 s/min), 1,878,043,093 distinct states found (1,581,184 ds/min), 169,034,999 states left on queue. +Progress(45) at 2024-11-07 05:27:09: 24,431,117,440 states generated (29,237,601 s/min), 1,879,528,913 distinct states found (1,485,820 ds/min), 168,669,766 states left on queue. +Progress(45) at 2024-11-07 05:28:09: 24,460,565,564 states generated (29,448,124 s/min), 1,881,382,841 distinct states found (1,853,928 ds/min), 168,585,549 states left on queue. +Progress(45) at 2024-11-07 05:29:09: 24,489,842,320 states generated (29,276,756 s/min), 1,883,163,526 distinct states found (1,780,685 ds/min), 168,440,866 states left on queue. +Progress(45) at 2024-11-07 05:30:09: 24,519,309,785 states generated (29,467,465 s/min), 1,884,840,978 distinct states found (1,677,452 ds/min), 168,176,100 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 05:31:09) +Progress(45) at 2024-11-07 05:31:09: 24,548,699,426 states generated (29,389,641 s/min), 1,886,346,733 distinct states found (1,505,755 ds/min), 167,794,030 states left on queue. +Progress(45) at 2024-11-07 05:32:09: 24,577,454,860 states generated (28,755,434 s/min), 1,887,761,288 distinct states found (1,414,555 ds/min), 167,342,409 states left on queue. +Progress(45) at 2024-11-07 05:33:09: 24,606,401,929 states generated (28,947,069 s/min), 1,889,451,503 distinct states found (1,690,215 ds/min), 167,115,718 states left on queue. +Progress(45) at 2024-11-07 05:34:09: 24,635,080,181 states generated (28,678,252 s/min), 1,891,013,080 distinct states found (1,561,577 ds/min), 166,760,395 states left on queue. +Progress(45) at 2024-11-07 05:35:09: 24,663,912,233 states generated (28,832,052 s/min), 1,892,486,967 distinct states found (1,473,887 ds/min), 166,347,547 states left on queue. +Progress(45) at 2024-11-07 05:36:09: 24,692,601,003 states generated (28,688,770 s/min), 1,894,014,661 distinct states found (1,527,694 ds/min), 165,980,327 states left on queue. +Progress(45) at 2024-11-07 05:37:09: 24,721,596,280 states generated (28,995,277 s/min), 1,895,667,269 distinct states found (1,652,608 ds/min), 165,766,132 states left on queue. +Progress(45) at 2024-11-07 05:38:09: 24,750,737,270 states generated (29,140,990 s/min), 1,897,304,588 distinct states found (1,637,319 ds/min), 165,602,331 states left on queue. +Progress(45) at 2024-11-07 05:39:09: 24,779,762,621 states generated (29,025,351 s/min), 1,898,944,557 distinct states found (1,639,969 ds/min), 165,399,097 states left on queue. +Progress(45) at 2024-11-07 05:40:09: 24,808,890,636 states generated (29,128,015 s/min), 1,901,039,200 distinct states found (2,094,643 ds/min), 165,505,866 states left on queue. +Progress(45) at 2024-11-07 05:41:09: 24,837,834,330 states generated (28,943,694 s/min), 1,902,825,947 distinct states found (1,786,747 ds/min), 165,385,690 states left on queue. +Progress(45) at 2024-11-07 05:42:09: 24,866,749,194 states generated (28,914,864 s/min), 1,904,509,048 distinct states found (1,683,101 ds/min), 165,143,394 states left on queue. +Progress(45) at 2024-11-07 05:43:09: 24,895,891,462 states generated (29,142,268 s/min), 1,906,186,633 distinct states found (1,677,585 ds/min), 164,907,199 states left on queue. +Progress(45) at 2024-11-07 05:44:09: 24,924,929,592 states generated (29,038,130 s/min), 1,907,774,010 distinct states found (1,587,377 ds/min), 164,567,256 states left on queue. +Progress(45) at 2024-11-07 05:45:09: 24,953,854,731 states generated (28,925,139 s/min), 1,909,438,393 distinct states found (1,664,383 ds/min), 164,297,435 states left on queue. +Progress(45) at 2024-11-07 05:46:09: 24,982,773,173 states generated (28,918,442 s/min), 1,911,115,370 distinct states found (1,676,977 ds/min), 164,029,981 states left on queue. +Progress(45) at 2024-11-07 05:47:09: 25,011,681,639 states generated (28,908,466 s/min), 1,912,739,102 distinct states found (1,623,732 ds/min), 163,722,709 states left on queue. +Progress(45) at 2024-11-07 05:48:09: 25,040,624,886 states generated (28,943,247 s/min), 1,914,465,220 distinct states found (1,726,118 ds/min), 163,504,979 states left on queue. +Progress(45) at 2024-11-07 05:49:09: 25,069,369,631 states generated (28,744,745 s/min), 1,916,123,524 distinct states found (1,658,304 ds/min), 163,227,016 states left on queue. +Progress(45) at 2024-11-07 05:50:09: 25,098,381,973 states generated (29,012,342 s/min), 1,917,856,454 distinct states found (1,732,930 ds/min), 163,020,213 states left on queue. +Progress(45) at 2024-11-07 05:51:09: 25,127,432,010 states generated (29,050,037 s/min), 1,919,715,623 distinct states found (1,859,169 ds/min), 162,903,211 states left on queue. +Progress(45) at 2024-11-07 05:52:09: 25,156,554,852 states generated (29,122,842 s/min), 1,921,381,482 distinct states found (1,665,859 ds/min), 162,640,342 states left on queue. +Progress(45) at 2024-11-07 05:53:09: 25,185,439,752 states generated (28,884,900 s/min), 1,923,074,493 distinct states found (1,693,011 ds/min), 162,418,419 states left on queue. +Progress(45) at 2024-11-07 05:54:09: 25,214,250,620 states generated (28,810,868 s/min), 1,924,599,166 distinct states found (1,524,673 ds/min), 162,035,736 states left on queue. +Progress(45) at 2024-11-07 05:55:09: 25,243,065,684 states generated (28,815,064 s/min), 1,926,028,590 distinct states found (1,429,424 ds/min), 161,647,928 states left on queue. +Progress(45) at 2024-11-07 05:56:09: 25,272,074,106 states generated (29,008,422 s/min), 1,927,788,924 distinct states found (1,760,334 ds/min), 161,469,066 states left on queue. +Progress(45) at 2024-11-07 05:57:09: 25,300,916,527 states generated (28,842,421 s/min), 1,929,427,503 distinct states found (1,638,579 ds/min), 161,203,063 states left on queue. +Progress(45) at 2024-11-07 05:58:09: 25,329,617,957 states generated (28,701,430 s/min), 1,931,016,200 distinct states found (1,588,697 ds/min), 160,883,828 states left on queue. +Progress(45) at 2024-11-07 05:59:09: 25,358,305,874 states generated (28,687,917 s/min), 1,932,700,683 distinct states found (1,684,483 ds/min), 160,613,534 states left on queue. +Progress(45) at 2024-11-07 06:00:09: 25,387,060,807 states generated (28,754,933 s/min), 1,934,352,908 distinct states found (1,652,225 ds/min), 160,340,594 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 06:01:10) +Progress(45) at 2024-11-07 06:01:10: 25,416,167,383 states generated (29,106,576 s/min), 1,936,031,185 distinct states found (1,678,277 ds/min), 160,024,096 states left on queue. +Progress(45) at 2024-11-07 06:02:10: 25,444,775,068 states generated (28,607,685 s/min), 1,937,531,864 distinct states found (1,500,679 ds/min), 159,558,759 states left on queue. +Progress(45) at 2024-11-07 06:03:10: 25,473,218,014 states generated (28,442,946 s/min), 1,938,932,593 distinct states found (1,400,729 ds/min), 159,031,186 states left on queue. +Progress(45) at 2024-11-07 06:04:10: 25,502,153,601 states generated (28,935,587 s/min), 1,940,366,906 distinct states found (1,434,313 ds/min), 158,550,067 states left on queue. +Progress(45) at 2024-11-07 06:05:10: 25,531,409,924 states generated (29,256,323 s/min), 1,942,031,081 distinct states found (1,664,175 ds/min), 158,260,393 states left on queue. +Progress(45) at 2024-11-07 06:06:10: 25,560,798,500 states generated (29,388,576 s/min), 1,943,755,697 distinct states found (1,724,616 ds/min), 158,001,838 states left on queue. +Progress(45) at 2024-11-07 06:07:10: 25,590,101,236 states generated (29,302,736 s/min), 1,945,659,191 distinct states found (1,903,494 ds/min), 157,894,541 states left on queue. +Progress(45) at 2024-11-07 06:08:10: 25,619,347,006 states generated (29,245,770 s/min), 1,947,436,584 distinct states found (1,777,393 ds/min), 157,703,839 states left on queue. +Progress(45) at 2024-11-07 06:09:10: 25,648,466,795 states generated (29,119,789 s/min), 1,949,039,117 distinct states found (1,602,533 ds/min), 157,391,298 states left on queue. +Progress(45) at 2024-11-07 06:10:10: 25,677,360,883 states generated (28,894,088 s/min), 1,950,787,656 distinct states found (1,748,539 ds/min), 157,176,854 states left on queue. +Progress(45) at 2024-11-07 06:11:10: 25,706,625,655 states generated (29,264,772 s/min), 1,952,700,166 distinct states found (1,912,510 ds/min), 157,069,408 states left on queue. +Progress(46) at 2024-11-07 06:12:10: 25,735,830,172 states generated (29,204,517 s/min), 1,954,444,069 distinct states found (1,743,903 ds/min), 156,852,227 states left on queue. +Progress(46) at 2024-11-07 06:13:10: 25,764,811,792 states generated (28,981,620 s/min), 1,956,165,433 distinct states found (1,721,364 ds/min), 156,618,900 states left on queue. +Progress(46) at 2024-11-07 06:14:10: 25,793,740,486 states generated (28,928,694 s/min), 1,957,961,862 distinct states found (1,796,429 ds/min), 156,441,787 states left on queue. +Progress(46) at 2024-11-07 06:15:10: 25,822,741,831 states generated (29,001,345 s/min), 1,959,749,416 distinct states found (1,787,554 ds/min), 156,253,838 states left on queue. +Progress(46) at 2024-11-07 06:16:10: 25,851,804,688 states generated (29,062,857 s/min), 1,961,466,422 distinct states found (1,717,006 ds/min), 155,977,351 states left on queue. +Progress(46) at 2024-11-07 06:17:10: 25,880,868,584 states generated (29,063,896 s/min), 1,963,090,742 distinct states found (1,624,320 ds/min), 155,628,145 states left on queue. +Progress(46) at 2024-11-07 06:18:10: 25,909,824,307 states generated (28,955,723 s/min), 1,964,570,100 distinct states found (1,479,358 ds/min), 155,182,107 states left on queue. +Progress(46) at 2024-11-07 06:19:10: 25,938,584,425 states generated (28,760,118 s/min), 1,966,303,642 distinct states found (1,733,542 ds/min), 154,946,766 states left on queue. +Progress(46) at 2024-11-07 06:20:10: 25,967,304,223 states generated (28,719,798 s/min), 1,967,883,207 distinct states found (1,579,565 ds/min), 154,558,935 states left on queue. +Progress(46) at 2024-11-07 06:21:10: 25,996,402,469 states generated (29,098,246 s/min), 1,969,591,000 distinct states found (1,707,793 ds/min), 154,302,069 states left on queue. +Progress(46) at 2024-11-07 06:22:10: 26,025,623,943 states generated (29,221,474 s/min), 1,971,434,403 distinct states found (1,843,403 ds/min), 154,157,059 states left on queue. +Progress(46) at 2024-11-07 06:23:10: 26,055,038,054 states generated (29,414,111 s/min), 1,973,261,720 distinct states found (1,827,317 ds/min), 153,981,317 states left on queue. +Progress(46) at 2024-11-07 06:24:10: 26,083,986,220 states generated (28,948,166 s/min), 1,974,670,648 distinct states found (1,408,928 ds/min), 153,508,388 states left on queue. +Progress(46) at 2024-11-07 06:25:10: 26,113,067,907 states generated (29,081,687 s/min), 1,976,391,547 distinct states found (1,720,899 ds/min), 153,263,845 states left on queue. +Progress(46) at 2024-11-07 06:26:10: 26,142,186,839 states generated (29,118,932 s/min), 1,978,379,881 distinct states found (1,988,334 ds/min), 153,253,200 states left on queue. +Progress(46) at 2024-11-07 06:27:10: 26,171,338,068 states generated (29,151,229 s/min), 1,980,293,569 distinct states found (1,913,688 ds/min), 153,185,559 states left on queue. +Progress(46) at 2024-11-07 06:28:10: 26,200,319,869 states generated (28,981,801 s/min), 1,982,130,034 distinct states found (1,836,465 ds/min), 153,039,826 states left on queue. +Progress(46) at 2024-11-07 06:29:10: 26,229,451,237 states generated (29,131,368 s/min), 1,984,117,981 distinct states found (1,987,947 ds/min), 153,030,792 states left on queue. +Progress(46) at 2024-11-07 06:30:10: 26,258,476,767 states generated (29,025,530 s/min), 1,985,981,073 distinct states found (1,863,092 ds/min), 152,917,939 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 06:31:11) +Progress(46) at 2024-11-07 06:31:11: 26,287,657,848 states generated (29,181,081 s/min), 1,987,875,178 distinct states found (1,894,105 ds/min), 152,784,901 states left on queue. +Progress(46) at 2024-11-07 06:32:11: 26,316,549,803 states generated (28,891,955 s/min), 1,989,821,141 distinct states found (1,945,963 ds/min), 152,728,813 states left on queue. +Progress(46) at 2024-11-07 06:33:11: 26,345,570,902 states generated (29,021,099 s/min), 1,991,762,973 distinct states found (1,941,832 ds/min), 152,648,662 states left on queue. +Progress(46) at 2024-11-07 06:34:11: 26,374,519,051 states generated (28,948,149 s/min), 1,993,605,958 distinct states found (1,842,985 ds/min), 152,446,201 states left on queue. +Progress(46) at 2024-11-07 06:35:11: 26,403,403,284 states generated (28,884,233 s/min), 1,995,379,328 distinct states found (1,773,370 ds/min), 152,189,032 states left on queue. +Progress(46) at 2024-11-07 06:36:11: 26,432,512,518 states generated (29,109,234 s/min), 1,997,205,848 distinct states found (1,826,520 ds/min), 152,060,823 states left on queue. +Progress(46) at 2024-11-07 06:37:11: 26,461,635,963 states generated (29,123,445 s/min), 1,999,221,288 distinct states found (2,015,440 ds/min), 152,052,317 states left on queue. +Progress(46) at 2024-11-07 06:38:11: 26,490,692,408 states generated (29,056,445 s/min), 2,001,003,940 distinct states found (1,782,652 ds/min), 151,869,333 states left on queue. +Progress(46) at 2024-11-07 06:39:11: 26,519,611,691 states generated (28,919,283 s/min), 2,002,772,264 distinct states found (1,768,324 ds/min), 151,637,576 states left on queue. +Progress(46) at 2024-11-07 06:40:11: 26,548,405,773 states generated (28,794,082 s/min), 2,004,530,832 distinct states found (1,758,568 ds/min), 151,415,653 states left on queue. +Progress(46) at 2024-11-07 06:41:11: 26,577,168,173 states generated (28,762,400 s/min), 2,006,431,383 distinct states found (1,900,551 ds/min), 151,293,655 states left on queue. +Progress(46) at 2024-11-07 06:42:11: 26,606,013,565 states generated (28,845,392 s/min), 2,008,118,930 distinct states found (1,687,547 ds/min), 150,979,607 states left on queue. +Progress(46) at 2024-11-07 06:43:11: 26,634,840,454 states generated (28,826,889 s/min), 2,010,033,233 distinct states found (1,914,303 ds/min), 150,859,233 states left on queue. +Progress(46) at 2024-11-07 06:44:11: 26,663,791,564 states generated (28,951,110 s/min), 2,011,764,506 distinct states found (1,731,273 ds/min), 150,592,176 states left on queue. +Progress(46) at 2024-11-07 06:45:11: 26,692,845,560 states generated (29,053,996 s/min), 2,013,541,948 distinct states found (1,777,442 ds/min), 150,346,125 states left on queue. +Progress(46) at 2024-11-07 06:46:11: 26,721,838,462 states generated (28,992,902 s/min), 2,015,055,311 distinct states found (1,513,363 ds/min), 149,898,025 states left on queue. +Progress(46) at 2024-11-07 06:47:11: 26,750,784,724 states generated (28,946,262 s/min), 2,016,795,791 distinct states found (1,740,480 ds/min), 149,636,143 states left on queue. +Progress(46) at 2024-11-07 06:48:11: 26,779,537,729 states generated (28,753,005 s/min), 2,018,420,817 distinct states found (1,625,026 ds/min), 149,264,338 states left on queue. +Progress(46) at 2024-11-07 06:49:11: 26,808,414,064 states generated (28,876,335 s/min), 2,019,941,133 distinct states found (1,520,316 ds/min), 148,833,851 states left on queue. +Progress(46) at 2024-11-07 06:50:11: 26,837,552,895 states generated (29,138,831 s/min), 2,021,402,334 distinct states found (1,461,201 ds/min), 148,423,082 states left on queue. +Progress(46) at 2024-11-07 06:51:11: 26,866,488,521 states generated (28,935,626 s/min), 2,022,896,299 distinct states found (1,493,965 ds/min), 148,037,640 states left on queue. +Progress(46) at 2024-11-07 06:52:11: 26,895,259,654 states generated (28,771,133 s/min), 2,024,306,180 distinct states found (1,409,881 ds/min), 147,623,626 states left on queue. +Progress(46) at 2024-11-07 06:53:11: 26,924,324,639 states generated (29,064,985 s/min), 2,025,751,691 distinct states found (1,445,511 ds/min), 147,237,191 states left on queue. +Progress(46) at 2024-11-07 06:54:11: 26,953,575,306 states generated (29,250,667 s/min), 2,027,292,041 distinct states found (1,540,350 ds/min), 146,929,253 states left on queue. +Progress(46) at 2024-11-07 06:55:11: 26,982,863,734 states generated (29,288,428 s/min), 2,029,056,116 distinct states found (1,764,075 ds/min), 146,774,179 states left on queue. +Progress(46) at 2024-11-07 06:56:11: 27,012,217,899 states generated (29,354,165 s/min), 2,030,705,091 distinct states found (1,648,975 ds/min), 146,523,776 states left on queue. +Progress(46) at 2024-11-07 06:57:11: 27,041,431,406 states generated (29,213,507 s/min), 2,032,122,917 distinct states found (1,417,826 ds/min), 146,066,712 states left on queue. +Progress(46) at 2024-11-07 06:58:11: 27,070,230,233 states generated (28,798,827 s/min), 2,033,502,867 distinct states found (1,379,950 ds/min), 145,580,465 states left on queue. +Progress(46) at 2024-11-07 06:59:11: 27,099,119,410 states generated (28,889,177 s/min), 2,035,080,295 distinct states found (1,577,428 ds/min), 145,255,429 states left on queue. +Progress(46) at 2024-11-07 07:00:11: 27,127,802,546 states generated (28,683,136 s/min), 2,036,480,069 distinct states found (1,399,774 ds/min), 144,763,326 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 07:01:11) +Progress(46) at 2024-11-07 07:01:11: 27,156,729,000 states generated (28,926,454 s/min), 2,037,888,171 distinct states found (1,408,102 ds/min), 144,282,188 states left on queue. +Progress(46) at 2024-11-07 07:02:11: 27,185,673,878 states generated (28,944,878 s/min), 2,039,404,499 distinct states found (1,516,328 ds/min), 143,933,899 states left on queue. +Progress(46) at 2024-11-07 07:03:11: 27,214,800,380 states generated (29,126,502 s/min), 2,040,991,907 distinct states found (1,587,408 ds/min), 143,736,528 states left on queue. +Progress(46) at 2024-11-07 07:04:11: 27,243,805,336 states generated (29,004,956 s/min), 2,042,560,493 distinct states found (1,568,586 ds/min), 143,474,607 states left on queue. +Progress(46) at 2024-11-07 07:05:11: 27,272,912,902 states generated (29,107,566 s/min), 2,044,549,687 distinct states found (1,989,194 ds/min), 143,501,934 states left on queue. +Progress(46) at 2024-11-07 07:06:11: 27,301,850,628 states generated (28,937,726 s/min), 2,046,213,816 distinct states found (1,664,129 ds/min), 143,280,971 states left on queue. +Progress(46) at 2024-11-07 07:07:11: 27,330,744,799 states generated (28,894,171 s/min), 2,047,777,121 distinct states found (1,563,305 ds/min), 142,943,602 states left on queue. +Progress(46) at 2024-11-07 07:08:11: 27,359,855,477 states generated (29,110,678 s/min), 2,049,356,015 distinct states found (1,578,894 ds/min), 142,631,188 states left on queue. +Progress(46) at 2024-11-07 07:09:11: 27,388,745,464 states generated (28,889,987 s/min), 2,050,822,496 distinct states found (1,466,481 ds/min), 142,190,439 states left on queue. +Progress(46) at 2024-11-07 07:10:11: 27,417,576,550 states generated (28,831,086 s/min), 2,052,379,523 distinct states found (1,557,027 ds/min), 141,821,153 states left on queue. +Progress(46) at 2024-11-07 07:11:11: 27,446,546,405 states generated (28,969,855 s/min), 2,053,934,499 distinct states found (1,554,976 ds/min), 141,462,097 states left on queue. +Progress(46) at 2024-11-07 07:12:11: 27,475,398,683 states generated (28,852,278 s/min), 2,055,510,649 distinct states found (1,576,150 ds/min), 141,116,110 states left on queue. +Progress(46) at 2024-11-07 07:13:11: 27,504,113,194 states generated (28,714,511 s/min), 2,057,051,677 distinct states found (1,541,028 ds/min), 140,743,906 states left on queue. +Progress(46) at 2024-11-07 07:14:11: 27,532,983,174 states generated (28,869,980 s/min), 2,058,669,649 distinct states found (1,617,972 ds/min), 140,436,853 states left on queue. +Progress(46) at 2024-11-07 07:15:11: 27,562,088,285 states generated (29,105,111 s/min), 2,060,404,146 distinct states found (1,734,497 ds/min), 140,213,296 states left on queue. +Progress(46) at 2024-11-07 07:16:11: 27,591,079,273 states generated (28,990,988 s/min), 2,061,979,907 distinct states found (1,575,761 ds/min), 139,895,056 states left on queue. +Progress(46) at 2024-11-07 07:17:11: 27,619,876,413 states generated (28,797,140 s/min), 2,063,482,225 distinct states found (1,502,318 ds/min), 139,506,174 states left on queue. +Progress(46) at 2024-11-07 07:18:11: 27,648,595,649 states generated (28,719,236 s/min), 2,064,847,355 distinct states found (1,365,130 ds/min), 139,035,783 states left on queue. +Progress(46) at 2024-11-07 07:19:11: 27,677,544,192 states generated (28,948,543 s/min), 2,066,507,355 distinct states found (1,660,000 ds/min), 138,783,592 states left on queue. +Progress(46) at 2024-11-07 07:20:11: 27,706,306,461 states generated (28,762,269 s/min), 2,068,019,192 distinct states found (1,511,837 ds/min), 138,418,256 states left on queue. +Progress(46) at 2024-11-07 07:21:11: 27,734,873,733 states generated (28,567,272 s/min), 2,069,467,142 distinct states found (1,447,950 ds/min), 137,977,630 states left on queue. +Progress(46) at 2024-11-07 07:22:11: 27,763,678,204 states generated (28,804,471 s/min), 2,071,034,824 distinct states found (1,567,682 ds/min), 137,622,296 states left on queue. +Progress(46) at 2024-11-07 07:23:11: 27,792,322,332 states generated (28,644,128 s/min), 2,072,586,226 distinct states found (1,551,402 ds/min), 137,231,762 states left on queue. +Progress(46) at 2024-11-07 07:24:11: 27,821,040,127 states generated (28,717,795 s/min), 2,074,030,831 distinct states found (1,444,605 ds/min), 136,731,600 states left on queue. +Progress(46) at 2024-11-07 07:25:11: 27,849,404,654 states generated (28,364,527 s/min), 2,075,273,409 distinct states found (1,242,578 ds/min), 136,082,131 states left on queue. +Progress(46) at 2024-11-07 07:26:11: 27,878,356,417 states generated (28,951,763 s/min), 2,076,656,601 distinct states found (1,383,192 ds/min), 135,570,796 states left on queue. +Progress(46) at 2024-11-07 07:27:11: 27,907,776,802 states generated (29,420,385 s/min), 2,078,383,391 distinct states found (1,726,790 ds/min), 135,306,248 states left on queue. +Progress(46) at 2024-11-07 07:28:11: 27,937,070,294 states generated (29,293,492 s/min), 2,080,076,828 distinct states found (1,693,437 ds/min), 135,034,380 states left on queue. +Progress(46) at 2024-11-07 07:29:11: 27,966,287,907 states generated (29,217,613 s/min), 2,081,855,223 distinct states found (1,778,395 ds/min), 134,839,763 states left on queue. +Progress(46) at 2024-11-07 07:30:11: 27,995,330,759 states generated (29,042,852 s/min), 2,083,372,197 distinct states found (1,516,974 ds/min), 134,461,641 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 07:31:12) +Progress(46) at 2024-11-07 07:31:12: 28,024,387,579 states generated (29,056,820 s/min), 2,085,018,193 distinct states found (1,645,996 ds/min), 134,150,126 states left on queue. +Progress(46) at 2024-11-07 07:32:12: 28,053,564,379 states generated (29,176,800 s/min), 2,086,850,158 distinct states found (1,831,965 ds/min), 133,983,084 states left on queue. +Progress(46) at 2024-11-07 07:33:12: 28,082,556,747 states generated (28,992,368 s/min), 2,088,444,271 distinct states found (1,594,113 ds/min), 133,651,269 states left on queue. +Progress(47) at 2024-11-07 07:34:12: 28,111,323,007 states generated (28,766,260 s/min), 2,090,072,790 distinct states found (1,628,519 ds/min), 133,350,640 states left on queue. +Progress(47) at 2024-11-07 07:35:12: 28,140,191,163 states generated (28,868,156 s/min), 2,091,740,224 distinct states found (1,667,434 ds/min), 133,070,266 states left on queue. +Progress(47) at 2024-11-07 07:36:12: 28,169,054,601 states generated (28,863,438 s/min), 2,093,375,975 distinct states found (1,635,751 ds/min), 132,752,319 states left on queue. +Progress(47) at 2024-11-07 07:37:12: 28,197,994,162 states generated (28,939,561 s/min), 2,094,929,793 distinct states found (1,553,818 ds/min), 132,356,738 states left on queue. +Progress(47) at 2024-11-07 07:38:12: 28,226,808,491 states generated (28,814,329 s/min), 2,096,311,441 distinct states found (1,381,648 ds/min), 131,832,292 states left on queue. +Progress(47) at 2024-11-07 07:39:12: 28,255,451,016 states generated (28,642,525 s/min), 2,097,907,185 distinct states found (1,595,744 ds/min), 131,487,862 states left on queue. +Progress(47) at 2024-11-07 07:40:12: 28,284,015,286 states generated (28,564,270 s/min), 2,099,332,452 distinct states found (1,425,267 ds/min), 130,982,897 states left on queue. +Progress(47) at 2024-11-07 07:41:12: 28,313,051,806 states generated (29,036,520 s/min), 2,101,053,792 distinct states found (1,721,340 ds/min), 130,744,522 states left on queue. +Progress(47) at 2024-11-07 07:42:12: 28,342,348,160 states generated (29,296,354 s/min), 2,102,778,970 distinct states found (1,725,178 ds/min), 130,505,777 states left on queue. +Progress(47) at 2024-11-07 07:43:12: 28,371,533,935 states generated (29,185,775 s/min), 2,104,337,778 distinct states found (1,558,808 ds/min), 130,144,304 states left on queue. +Progress(47) at 2024-11-07 07:44:12: 28,400,351,066 states generated (28,817,131 s/min), 2,105,835,284 distinct states found (1,497,506 ds/min), 129,719,871 states left on queue. +Progress(47) at 2024-11-07 07:45:12: 28,429,411,463 states generated (29,060,397 s/min), 2,107,704,752 distinct states found (1,869,468 ds/min), 129,618,749 states left on queue. +Progress(47) at 2024-11-07 07:46:12: 28,458,488,093 states generated (29,076,630 s/min), 2,109,483,825 distinct states found (1,779,073 ds/min), 129,439,723 states left on queue. +Progress(47) at 2024-11-07 07:47:12: 28,487,338,391 states generated (28,850,298 s/min), 2,111,230,358 distinct states found (1,746,533 ds/min), 129,232,124 states left on queue. +Progress(47) at 2024-11-07 07:48:12: 28,516,411,931 states generated (29,073,540 s/min), 2,113,150,785 distinct states found (1,920,427 ds/min), 129,168,385 states left on queue. +Progress(47) at 2024-11-07 07:49:12: 28,545,299,037 states generated (28,887,106 s/min), 2,114,878,071 distinct states found (1,727,286 ds/min), 128,948,735 states left on queue. +Progress(47) at 2024-11-07 07:50:12: 28,574,186,091 states generated (28,887,054 s/min), 2,116,622,746 distinct states found (1,744,675 ds/min), 128,711,386 states left on queue. +Progress(47) at 2024-11-07 07:51:12: 28,603,057,442 states generated (28,871,351 s/min), 2,118,435,710 distinct states found (1,812,964 ds/min), 128,543,573 states left on queue. +Progress(47) at 2024-11-07 07:52:12: 28,632,042,720 states generated (28,985,278 s/min), 2,120,240,818 distinct states found (1,805,108 ds/min), 128,349,742 states left on queue. +Progress(47) at 2024-11-07 07:53:12: 28,660,885,097 states generated (28,842,377 s/min), 2,121,904,885 distinct states found (1,664,067 ds/min), 128,002,987 states left on queue. +Progress(47) at 2024-11-07 07:54:12: 28,689,690,902 states generated (28,805,805 s/min), 2,123,498,767 distinct states found (1,593,882 ds/min), 127,622,035 states left on queue. +Progress(47) at 2024-11-07 07:55:12: 28,718,827,206 states generated (29,136,304 s/min), 2,125,375,087 distinct states found (1,876,320 ds/min), 127,518,682 states left on queue. +Progress(47) at 2024-11-07 07:56:12: 28,747,988,287 states generated (29,161,081 s/min), 2,127,234,055 distinct states found (1,858,968 ds/min), 127,390,123 states left on queue. +Progress(47) at 2024-11-07 07:57:12: 28,776,918,449 states generated (28,930,162 s/min), 2,128,896,639 distinct states found (1,662,584 ds/min), 127,099,202 states left on queue. +Progress(47) at 2024-11-07 07:58:12: 28,805,826,521 states generated (28,908,072 s/min), 2,130,485,896 distinct states found (1,589,257 ds/min), 126,731,846 states left on queue. +Progress(47) at 2024-11-07 07:59:12: 28,834,550,061 states generated (28,723,540 s/min), 2,132,267,049 distinct states found (1,781,153 ds/min), 126,524,859 states left on queue. +Progress(47) at 2024-11-07 08:00:12: 28,863,218,037 states generated (28,667,976 s/min), 2,133,901,471 distinct states found (1,634,422 ds/min), 126,149,810 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 08:01:13) +Progress(47) at 2024-11-07 08:01:13: 28,892,405,277 states generated (29,187,240 s/min), 2,135,683,266 distinct states found (1,781,795 ds/min), 125,938,046 states left on queue. +Progress(47) at 2024-11-07 08:02:13: 28,921,188,007 states generated (28,782,730 s/min), 2,137,299,589 distinct states found (1,616,323 ds/min), 125,575,223 states left on queue. +Progress(47) at 2024-11-07 08:03:13: 28,950,198,581 states generated (29,010,574 s/min), 2,138,945,715 distinct states found (1,646,126 ds/min), 125,225,825 states left on queue. +Progress(47) at 2024-11-07 08:04:13: 28,979,052,322 states generated (28,853,741 s/min), 2,140,384,312 distinct states found (1,438,597 ds/min), 124,739,890 states left on queue. +Progress(47) at 2024-11-07 08:05:13: 29,007,862,556 states generated (28,810,234 s/min), 2,142,020,690 distinct states found (1,636,378 ds/min), 124,389,570 states left on queue. +Progress(47) at 2024-11-07 08:06:13: 29,036,639,997 states generated (28,777,441 s/min), 2,143,436,769 distinct states found (1,416,079 ds/min), 123,853,456 states left on queue. +Progress(47) at 2024-11-07 08:07:13: 29,065,681,489 states generated (29,041,492 s/min), 2,144,841,718 distinct states found (1,404,949 ds/min), 123,385,042 states left on queue. +Progress(47) at 2024-11-07 08:08:13: 29,094,462,032 states generated (28,780,543 s/min), 2,146,214,867 distinct states found (1,373,149 ds/min), 122,908,921 states left on queue. +Progress(47) at 2024-11-07 08:09:13: 29,123,289,758 states generated (28,827,726 s/min), 2,147,553,984 distinct states found (1,339,117 ds/min), 122,446,193 states left on queue. +Progress(47) at 2024-11-07 08:10:13: 29,152,503,386 states generated (29,213,628 s/min), 2,148,942,911 distinct states found (1,388,927 ds/min), 122,030,640 states left on queue. +Progress(47) at 2024-11-07 08:11:13: 29,181,728,737 states generated (29,225,351 s/min), 2,150,631,619 distinct states found (1,688,708 ds/min), 121,816,919 states left on queue. +Progress(47) at 2024-11-07 08:12:13: 29,211,003,478 states generated (29,274,741 s/min), 2,152,175,254 distinct states found (1,543,635 ds/min), 121,489,774 states left on queue. +Progress(47) at 2024-11-07 08:13:13: 29,240,102,268 states generated (29,098,790 s/min), 2,153,537,952 distinct states found (1,362,698 ds/min), 120,992,206 states left on queue. +Progress(47) at 2024-11-07 08:14:13: 29,268,843,458 states generated (28,741,190 s/min), 2,154,896,522 distinct states found (1,358,570 ds/min), 120,481,830 states left on queue. +Progress(47) at 2024-11-07 08:15:13: 29,297,458,982 states generated (28,615,524 s/min), 2,156,228,693 distinct states found (1,332,171 ds/min), 119,935,590 states left on queue. +Progress(47) at 2024-11-07 08:16:13: 29,326,133,934 states generated (28,674,952 s/min), 2,157,558,222 distinct states found (1,329,529 ds/min), 119,402,611 states left on queue. +Progress(47) at 2024-11-07 08:17:13: 29,355,133,179 states generated (28,999,245 s/min), 2,159,036,229 distinct states found (1,478,007 ds/min), 119,059,305 states left on queue. +Progress(47) at 2024-11-07 08:18:13: 29,384,094,216 states generated (28,961,037 s/min), 2,160,401,726 distinct states found (1,365,497 ds/min), 118,659,528 states left on queue. +Progress(47) at 2024-11-07 08:19:13: 29,413,210,497 states generated (29,116,281 s/min), 2,162,252,062 distinct states found (1,850,336 ds/min), 118,605,990 states left on queue. +Progress(47) at 2024-11-07 08:20:13: 29,442,123,726 states generated (28,913,229 s/min), 2,163,968,572 distinct states found (1,716,510 ds/min), 118,430,828 states left on queue. +Progress(47) at 2024-11-07 08:21:13: 29,470,933,813 states generated (28,810,087 s/min), 2,165,411,802 distinct states found (1,443,230 ds/min), 118,017,068 states left on queue. +Progress(47) at 2024-11-07 08:22:13: 29,499,968,878 states generated (29,035,065 s/min), 2,166,884,069 distinct states found (1,472,267 ds/min), 117,620,342 states left on queue. +Progress(47) at 2024-11-07 08:23:13: 29,528,752,811 states generated (28,783,933 s/min), 2,168,252,577 distinct states found (1,368,508 ds/min), 117,101,560 states left on queue. +Progress(47) at 2024-11-07 08:24:13: 29,557,568,598 states generated (28,815,787 s/min), 2,169,705,158 distinct states found (1,452,581 ds/min), 116,651,662 states left on queue. +Progress(47) at 2024-11-07 08:25:13: 29,586,373,945 states generated (28,805,347 s/min), 2,171,138,563 distinct states found (1,433,405 ds/min), 116,184,414 states left on queue. +Progress(47) at 2024-11-07 08:26:13: 29,614,983,668 states generated (28,609,723 s/min), 2,172,585,802 distinct states found (1,447,239 ds/min), 115,737,683 states left on queue. +Progress(47) at 2024-11-07 08:27:13: 29,643,800,320 states generated (28,816,652 s/min), 2,174,078,381 distinct states found (1,492,579 ds/min), 115,326,021 states left on queue. +Progress(47) at 2024-11-07 08:28:13: 29,672,907,645 states generated (29,107,325 s/min), 2,175,677,520 distinct states found (1,599,139 ds/min), 114,997,885 states left on queue. +Progress(47) at 2024-11-07 08:29:13: 29,701,705,556 states generated (28,797,911 s/min), 2,177,190,856 distinct states found (1,513,336 ds/min), 114,628,087 states left on queue. +Progress(47) at 2024-11-07 08:30:13: 29,730,412,995 states generated (28,707,439 s/min), 2,178,512,609 distinct states found (1,321,753 ds/min), 114,087,841 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 08:31:13) +Progress(47) at 2024-11-07 08:31:13: 29,759,387,383 states generated (28,974,388 s/min), 2,180,040,685 distinct states found (1,528,076 ds/min), 113,747,306 states left on queue. +Progress(47) at 2024-11-07 08:32:13: 29,788,001,065 states generated (28,613,682 s/min), 2,181,419,698 distinct states found (1,379,013 ds/min), 113,276,454 states left on queue. +Progress(47) at 2024-11-07 08:33:13: 29,816,483,253 states generated (28,482,188 s/min), 2,182,794,509 distinct states found (1,374,811 ds/min), 112,764,181 states left on queue. +Progress(47) at 2024-11-07 08:34:13: 29,845,032,133 states generated (28,548,880 s/min), 2,184,180,577 distinct states found (1,386,068 ds/min), 112,275,854 states left on queue. +Progress(47) at 2024-11-07 08:35:13: 29,873,704,121 states generated (28,671,988 s/min), 2,185,610,616 distinct states found (1,430,039 ds/min), 111,765,886 states left on queue. +Progress(47) at 2024-11-07 08:36:13: 29,901,983,007 states generated (28,278,886 s/min), 2,186,742,865 distinct states found (1,132,249 ds/min), 111,037,502 states left on queue. +Progress(47) at 2024-11-07 08:37:13: 29,931,128,222 states generated (29,145,215 s/min), 2,188,247,053 distinct states found (1,504,188 ds/min), 110,610,871 states left on queue. +Progress(47) at 2024-11-07 08:38:13: 29,960,291,600 states generated (29,163,378 s/min), 2,189,791,380 distinct states found (1,544,327 ds/min), 110,219,347 states left on queue. +Progress(47) at 2024-11-07 08:39:13: 29,989,426,093 states generated (29,134,493 s/min), 2,191,523,686 distinct states found (1,732,306 ds/min), 109,988,090 states left on queue. +Progress(47) at 2024-11-07 08:40:13: 30,018,419,613 states generated (28,993,520 s/min), 2,192,983,724 distinct states found (1,460,038 ds/min), 109,567,153 states left on queue. +Progress(47) at 2024-11-07 08:41:13: 30,047,169,261 states generated (28,749,648 s/min), 2,194,485,325 distinct states found (1,501,601 ds/min), 109,159,610 states left on queue. +Progress(47) at 2024-11-07 08:42:13: 30,076,320,011 states generated (29,150,750 s/min), 2,196,261,852 distinct states found (1,776,527 ds/min), 108,952,775 states left on queue. +Progress(47) at 2024-11-07 08:43:13: 30,105,246,939 states generated (28,926,928 s/min), 2,197,745,917 distinct states found (1,484,065 ds/min), 108,533,801 states left on queue. +Progress(47) at 2024-11-07 08:44:13: 30,134,017,722 states generated (28,770,783 s/min), 2,199,274,846 distinct states found (1,528,929 ds/min), 108,138,210 states left on queue. +Progress(48) at 2024-11-07 08:45:13: 30,162,850,009 states generated (28,832,287 s/min), 2,200,818,695 distinct states found (1,543,849 ds/min), 107,749,686 states left on queue. +Progress(48) at 2024-11-07 08:46:13: 30,191,763,541 states generated (28,913,532 s/min), 2,202,269,881 distinct states found (1,451,186 ds/min), 107,274,074 states left on queue. +Progress(48) at 2024-11-07 08:47:13: 30,220,450,821 states generated (28,687,280 s/min), 2,203,579,369 distinct states found (1,309,488 ds/min), 106,693,506 states left on queue. +Progress(48) at 2024-11-07 08:48:13: 30,249,109,647 states generated (28,658,826 s/min), 2,204,980,828 distinct states found (1,401,459 ds/min), 106,171,815 states left on queue. +Progress(48) at 2024-11-07 08:49:13: 30,278,004,502 states generated (28,894,855 s/min), 2,206,546,641 distinct states found (1,565,813 ds/min), 105,800,017 states left on queue. +Progress(48) at 2024-11-07 08:50:13: 30,307,176,628 states generated (29,172,126 s/min), 2,208,173,395 distinct states found (1,626,754 ds/min), 105,492,735 states left on queue. +Progress(48) at 2024-11-07 08:51:13: 30,336,267,563 states generated (29,090,935 s/min), 2,209,629,083 distinct states found (1,455,688 ds/min), 105,046,561 states left on queue. +Progress(48) at 2024-11-07 08:52:13: 30,365,237,300 states generated (28,969,737 s/min), 2,211,221,126 distinct states found (1,592,043 ds/min), 104,707,696 states left on queue. +Progress(48) at 2024-11-07 08:53:13: 30,394,270,909 states generated (29,033,609 s/min), 2,212,948,437 distinct states found (1,727,311 ds/min), 104,490,104 states left on queue. +Progress(48) at 2024-11-07 08:54:13: 30,423,140,115 states generated (28,869,206 s/min), 2,214,640,116 distinct states found (1,691,679 ds/min), 104,243,061 states left on queue. +Progress(48) at 2024-11-07 08:55:13: 30,452,062,605 states generated (28,922,490 s/min), 2,216,327,939 distinct states found (1,687,823 ds/min), 103,983,745 states left on queue. +Progress(48) at 2024-11-07 08:56:13: 30,481,071,056 states generated (29,008,451 s/min), 2,217,983,905 distinct states found (1,655,966 ds/min), 103,702,586 states left on queue. +Progress(48) at 2024-11-07 08:57:13: 30,509,808,031 states generated (28,736,975 s/min), 2,219,662,593 distinct states found (1,678,688 ds/min), 103,423,522 states left on queue. +Progress(48) at 2024-11-07 08:58:13: 30,538,616,862 states generated (28,808,831 s/min), 2,221,288,821 distinct states found (1,626,228 ds/min), 103,098,334 states left on queue. +Progress(48) at 2024-11-07 08:59:13: 30,567,539,949 states generated (28,923,087 s/min), 2,222,969,669 distinct states found (1,680,848 ds/min), 102,811,145 states left on queue. +Progress(48) at 2024-11-07 09:00:13: 30,596,220,572 states generated (28,680,623 s/min), 2,224,451,086 distinct states found (1,481,417 ds/min), 102,320,643 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 09:01:14) +Progress(48) at 2024-11-07 09:01:14: 30,625,254,005 states generated (29,033,433 s/min), 2,225,971,213 distinct states found (1,520,127 ds/min), 101,895,678 states left on queue. +Progress(48) at 2024-11-07 09:02:14: 30,654,316,875 states generated (29,062,870 s/min), 2,227,776,007 distinct states found (1,804,794 ds/min), 101,720,925 states left on queue. +Progress(48) at 2024-11-07 09:03:14: 30,683,368,837 states generated (29,051,962 s/min), 2,229,520,592 distinct states found (1,744,585 ds/min), 101,516,049 states left on queue. +Progress(48) at 2024-11-07 09:04:14: 30,712,221,770 states generated (28,852,933 s/min), 2,231,006,576 distinct states found (1,485,984 ds/min), 101,059,951 states left on queue. +Progress(48) at 2024-11-07 09:05:14: 30,740,916,958 states generated (28,695,188 s/min), 2,232,634,565 distinct states found (1,627,989 ds/min), 100,742,863 states left on queue. +Progress(48) at 2024-11-07 09:06:14: 30,769,477,527 states generated (28,560,569 s/min), 2,234,099,495 distinct states found (1,464,930 ds/min), 100,237,731 states left on queue. +Progress(48) at 2024-11-07 09:07:14: 30,798,306,365 states generated (28,828,838 s/min), 2,235,757,510 distinct states found (1,658,015 ds/min), 99,936,798 states left on queue. +Progress(48) at 2024-11-07 09:08:14: 30,827,145,014 states generated (28,838,649 s/min), 2,237,323,374 distinct states found (1,565,864 ds/min), 99,542,928 states left on queue. +Progress(48) at 2024-11-07 09:09:14: 30,855,967,384 states generated (28,822,370 s/min), 2,238,712,445 distinct states found (1,389,071 ds/min), 98,994,892 states left on queue. +Progress(48) at 2024-11-07 09:10:14: 30,884,757,904 states generated (28,790,520 s/min), 2,240,211,537 distinct states found (1,499,092 ds/min), 98,555,003 states left on queue. +Progress(48) at 2024-11-07 09:11:14: 30,913,436,301 states generated (28,678,397 s/min), 2,241,549,402 distinct states found (1,337,865 ds/min), 97,972,368 states left on queue. +Progress(48) at 2024-11-07 09:12:14: 30,942,398,628 states generated (28,962,327 s/min), 2,242,894,478 distinct states found (1,345,076 ds/min), 97,450,191 states left on queue. +Progress(48) at 2024-11-07 09:13:14: 30,971,150,912 states generated (28,752,284 s/min), 2,244,149,533 distinct states found (1,255,055 ds/min), 96,915,440 states left on queue. +Progress(48) at 2024-11-07 09:14:14: 31,000,226,695 states generated (29,075,783 s/min), 2,245,486,253 distinct states found (1,336,720 ds/min), 96,453,711 states left on queue. +Progress(48) at 2024-11-07 09:15:14: 31,029,410,660 states generated (29,183,965 s/min), 2,247,033,348 distinct states found (1,547,095 ds/min), 96,134,910 states left on queue. +Progress(48) at 2024-11-07 09:16:14: 31,058,657,395 states generated (29,246,735 s/min), 2,248,447,081 distinct states found (1,413,733 ds/min), 95,701,875 states left on queue. +Progress(48) at 2024-11-07 09:17:14: 31,087,368,874 states generated (28,711,479 s/min), 2,249,703,997 distinct states found (1,256,916 ds/min), 95,112,797 states left on queue. +Progress(48) at 2024-11-07 09:18:14: 31,115,905,907 states generated (28,537,033 s/min), 2,250,949,093 distinct states found (1,245,096 ds/min), 94,499,889 states left on queue. +Progress(48) at 2024-11-07 09:19:14: 31,144,578,992 states generated (28,673,085 s/min), 2,252,226,995 distinct states found (1,277,902 ds/min), 93,927,098 states left on queue. +Progress(48) at 2024-11-07 09:20:14: 31,173,557,966 states generated (28,978,974 s/min), 2,253,602,196 distinct states found (1,375,201 ds/min), 93,561,559 states left on queue. +Progress(48) at 2024-11-07 09:21:14: 31,202,521,307 states generated (28,963,341 s/min), 2,255,224,149 distinct states found (1,621,953 ds/min), 93,337,000 states left on queue. +Progress(48) at 2024-11-07 09:22:14: 31,231,451,884 states generated (28,930,577 s/min), 2,256,879,564 distinct states found (1,655,415 ds/min), 93,119,996 states left on queue. +Progress(48) at 2024-11-07 09:23:14: 31,260,174,245 states generated (28,722,361 s/min), 2,258,206,514 distinct states found (1,326,950 ds/min), 92,610,216 states left on queue. +Progress(48) at 2024-11-07 09:24:14: 31,289,091,475 states generated (28,917,230 s/min), 2,259,564,810 distinct states found (1,358,296 ds/min), 92,123,452 states left on queue. +Progress(48) at 2024-11-07 09:25:14: 31,317,753,943 states generated (28,662,468 s/min), 2,260,868,559 distinct states found (1,303,749 ds/min), 91,550,997 states left on queue. +Progress(48) at 2024-11-07 09:26:14: 31,346,435,672 states generated (28,681,729 s/min), 2,262,197,433 distinct states found (1,328,874 ds/min), 91,002,731 states left on queue. +Progress(48) at 2024-11-07 09:27:14: 31,375,074,275 states generated (28,638,603 s/min), 2,263,549,308 distinct states found (1,351,875 ds/min), 90,479,028 states left on queue. +Progress(48) at 2024-11-07 09:28:14: 31,403,896,903 states generated (28,822,628 s/min), 2,264,999,048 distinct states found (1,449,740 ds/min), 90,030,284 states left on queue. +Progress(48) at 2024-11-07 09:29:14: 31,432,772,052 states generated (28,875,149 s/min), 2,266,431,878 distinct states found (1,432,830 ds/min), 89,580,165 states left on queue. +Progress(48) at 2024-11-07 09:30:14: 31,461,382,905 states generated (28,610,853 s/min), 2,267,701,315 distinct states found (1,269,437 ds/min), 89,008,135 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 09:31:15) +Progress(48) at 2024-11-07 09:31:15: 31,490,350,002 states generated (28,967,097 s/min), 2,269,120,991 distinct states found (1,419,676 ds/min), 88,574,899 states left on queue. +Progress(48) at 2024-11-07 09:32:15: 31,518,738,286 states generated (28,388,284 s/min), 2,270,333,667 distinct states found (1,212,676 ds/min), 87,950,800 states left on queue. +Progress(48) at 2024-11-07 09:33:15: 31,547,227,429 states generated (28,489,143 s/min), 2,271,632,491 distinct states found (1,298,824 ds/min), 87,379,110 states left on queue. +Progress(48) at 2024-11-07 09:34:15: 31,575,696,846 states generated (28,469,417 s/min), 2,272,873,166 distinct states found (1,240,675 ds/min), 86,717,955 states left on queue. +Progress(48) at 2024-11-07 09:35:15: 31,604,509,248 states generated (28,812,402 s/min), 2,274,166,128 distinct states found (1,292,962 ds/min), 86,122,414 states left on queue. +Progress(48) at 2024-11-07 09:36:15: 31,633,623,894 states generated (29,114,646 s/min), 2,275,690,739 distinct states found (1,524,611 ds/min), 85,718,820 states left on queue. +Progress(48) at 2024-11-07 09:37:15: 31,662,734,164 states generated (29,110,270 s/min), 2,277,282,041 distinct states found (1,591,302 ds/min), 85,389,121 states left on queue. +Progress(48) at 2024-11-07 09:38:15: 31,691,488,753 states generated (28,754,589 s/min), 2,278,666,982 distinct states found (1,384,941 ds/min), 84,903,119 states left on queue. +Progress(48) at 2024-11-07 09:39:15: 31,720,428,706 states generated (28,939,953 s/min), 2,280,231,311 distinct states found (1,564,329 ds/min), 84,529,794 states left on queue. +Progress(48) at 2024-11-07 09:40:15: 31,749,336,886 states generated (28,908,180 s/min), 2,281,688,218 distinct states found (1,456,907 ds/min), 84,091,511 states left on queue. +Progress(48) at 2024-11-07 09:41:15: 31,778,054,342 states generated (28,717,456 s/min), 2,283,102,693 distinct states found (1,414,475 ds/min), 83,605,316 states left on queue. +Progress(49) at 2024-11-07 09:42:15: 31,806,874,604 states generated (28,820,262 s/min), 2,284,525,902 distinct states found (1,423,209 ds/min), 83,115,134 states left on queue. +Progress(49) at 2024-11-07 09:43:15: 31,835,557,645 states generated (28,683,041 s/min), 2,285,776,893 distinct states found (1,250,991 ds/min), 82,491,419 states left on queue. +Progress(49) at 2024-11-07 09:44:15: 31,864,075,450 states generated (28,517,805 s/min), 2,287,028,991 distinct states found (1,252,098 ds/min), 81,847,819 states left on queue. +Progress(49) at 2024-11-07 09:45:15: 31,892,999,186 states generated (28,923,736 s/min), 2,288,552,140 distinct states found (1,523,149 ds/min), 81,459,937 states left on queue. +Progress(49) at 2024-11-07 09:46:15: 31,922,276,996 states generated (29,277,810 s/min), 2,290,137,668 distinct states found (1,585,528 ds/min), 81,115,285 states left on queue. +Progress(49) at 2024-11-07 09:47:15: 31,951,109,751 states generated (28,832,755 s/min), 2,291,477,001 distinct states found (1,339,333 ds/min), 80,582,606 states left on queue. +Progress(49) at 2024-11-07 09:48:15: 31,980,103,122 states generated (28,993,371 s/min), 2,293,149,633 distinct states found (1,672,632 ds/min), 80,321,900 states left on queue. +Progress(49) at 2024-11-07 09:49:15: 32,008,927,227 states generated (28,824,105 s/min), 2,294,737,299 distinct states found (1,587,666 ds/min), 79,988,982 states left on queue. +Progress(49) at 2024-11-07 09:50:15: 32,037,912,405 states generated (28,985,178 s/min), 2,296,369,269 distinct states found (1,631,970 ds/min), 79,688,340 states left on queue. +Progress(49) at 2024-11-07 09:51:15: 32,066,650,871 states generated (28,738,466 s/min), 2,297,881,682 distinct states found (1,512,413 ds/min), 79,285,058 states left on queue. +Progress(49) at 2024-11-07 09:52:15: 32,095,474,869 states generated (28,823,998 s/min), 2,299,386,856 distinct states found (1,505,174 ds/min), 78,860,285 states left on queue. +Progress(49) at 2024-11-07 09:53:15: 32,124,254,306 states generated (28,779,437 s/min), 2,300,974,245 distinct states found (1,587,389 ds/min), 78,501,509 states left on queue. +Progress(49) at 2024-11-07 09:54:15: 32,152,874,934 states generated (28,620,628 s/min), 2,302,313,494 distinct states found (1,339,249 ds/min), 77,908,264 states left on queue. +Progress(49) at 2024-11-07 09:55:15: 32,181,625,656 states generated (28,750,722 s/min), 2,303,719,911 distinct states found (1,406,417 ds/min), 77,409,147 states left on queue. +Progress(49) at 2024-11-07 09:56:15: 32,210,690,682 states generated (29,065,026 s/min), 2,305,458,559 distinct states found (1,738,648 ds/min), 77,178,015 states left on queue. +Progress(49) at 2024-11-07 09:57:15: 32,239,586,160 states generated (28,895,478 s/min), 2,307,003,156 distinct states found (1,544,597 ds/min), 76,805,818 states left on queue. +Progress(49) at 2024-11-07 09:58:15: 32,268,327,819 states generated (28,741,659 s/min), 2,308,436,891 distinct states found (1,433,735 ds/min), 76,324,212 states left on queue. +Progress(49) at 2024-11-07 09:59:15: 32,296,829,379 states generated (28,501,560 s/min), 2,309,831,948 distinct states found (1,395,057 ds/min), 75,779,735 states left on queue. +Progress(49) at 2024-11-07 10:00:15: 32,325,628,397 states generated (28,799,018 s/min), 2,311,380,882 distinct states found (1,548,934 ds/min), 75,395,162 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 10:01:15) +Progress(49) at 2024-11-07 10:01:15: 32,354,681,149 states generated (29,052,752 s/min), 2,312,867,979 distinct states found (1,487,097 ds/min), 74,928,503 states left on queue. +Progress(49) at 2024-11-07 10:02:15: 32,383,406,034 states generated (28,724,885 s/min), 2,314,202,265 distinct states found (1,334,286 ds/min), 74,352,680 states left on queue. +Progress(49) at 2024-11-07 10:03:15: 32,411,997,317 states generated (28,591,283 s/min), 2,315,435,082 distinct states found (1,232,817 ds/min), 73,700,708 states left on queue. +Progress(49) at 2024-11-07 10:04:15: 32,440,769,297 states generated (28,771,980 s/min), 2,316,687,791 distinct states found (1,252,709 ds/min), 73,114,003 states left on queue. +Progress(49) at 2024-11-07 10:05:15: 32,469,733,062 states generated (28,963,765 s/min), 2,317,885,762 distinct states found (1,197,971 ds/min), 72,558,372 states left on queue. +Progress(49) at 2024-11-07 10:06:15: 32,498,863,740 states generated (29,130,678 s/min), 2,319,353,511 distinct states found (1,467,749 ds/min), 72,186,248 states left on queue. +Progress(49) at 2024-11-07 10:07:15: 32,527,902,407 states generated (29,038,667 s/min), 2,320,635,445 distinct states found (1,281,934 ds/min), 71,639,893 states left on queue. +Progress(49) at 2024-11-07 10:08:15: 32,556,361,400 states generated (28,458,993 s/min), 2,321,793,726 distinct states found (1,158,281 ds/min), 70,954,333 states left on queue. +Progress(49) at 2024-11-07 10:09:15: 32,585,056,251 states generated (28,694,851 s/min), 2,323,009,155 distinct states found (1,215,429 ds/min), 70,362,671 states left on queue. +Progress(49) at 2024-11-07 10:10:15: 32,613,972,815 states generated (28,916,564 s/min), 2,324,321,084 distinct states found (1,311,929 ds/min), 69,935,186 states left on queue. +Progress(49) at 2024-11-07 10:11:15: 32,642,963,038 states generated (28,990,223 s/min), 2,325,997,874 distinct states found (1,676,790 ds/min), 69,730,871 states left on queue. +Progress(49) at 2024-11-07 10:12:15: 32,671,642,762 states generated (28,679,724 s/min), 2,327,294,217 distinct states found (1,296,343 ds/min), 69,221,413 states left on queue. +Progress(49) at 2024-11-07 10:13:15: 32,700,429,296 states generated (28,786,534 s/min), 2,328,535,742 distinct states found (1,241,525 ds/min), 68,635,066 states left on queue. +Progress(49) at 2024-11-07 10:14:15: 32,729,076,182 states generated (28,646,886 s/min), 2,329,760,071 distinct states found (1,224,329 ds/min), 67,997,735 states left on queue. +Progress(49) at 2024-11-07 10:15:15: 32,757,631,787 states generated (28,555,605 s/min), 2,331,002,517 distinct states found (1,242,446 ds/min), 67,379,374 states left on queue. +Progress(49) at 2024-11-07 10:16:15: 32,786,472,553 states generated (28,840,766 s/min), 2,332,364,440 distinct states found (1,361,923 ds/min), 66,856,953 states left on queue. +Progress(49) at 2024-11-07 10:17:15: 32,815,068,782 states generated (28,596,229 s/min), 2,333,629,799 distinct states found (1,265,359 ds/min), 66,266,973 states left on queue. +Progress(49) at 2024-11-07 10:18:15: 32,843,671,035 states generated (28,602,253 s/min), 2,334,875,787 distinct states found (1,245,988 ds/min), 65,714,901 states left on queue. +Progress(49) at 2024-11-07 10:19:15: 32,872,127,728 states generated (28,456,693 s/min), 2,336,030,334 distinct states found (1,154,547 ds/min), 65,023,805 states left on queue. +Progress(49) at 2024-11-07 10:20:15: 32,900,582,167 states generated (28,454,439 s/min), 2,337,180,611 distinct states found (1,150,277 ds/min), 64,304,348 states left on queue. +Progress(49) at 2024-11-07 10:21:15: 32,929,545,972 states generated (28,963,805 s/min), 2,338,488,833 distinct states found (1,308,222 ds/min), 63,715,470 states left on queue. +Progress(49) at 2024-11-07 10:22:15: 32,958,603,673 states generated (29,057,701 s/min), 2,339,992,330 distinct states found (1,503,497 ds/min), 63,307,968 states left on queue. +Progress(49) at 2024-11-07 10:23:15: 32,987,442,078 states generated (28,838,405 s/min), 2,341,335,966 distinct states found (1,343,636 ds/min), 62,792,292 states left on queue. +Progress(49) at 2024-11-07 10:24:15: 33,016,381,018 states generated (28,938,940 s/min), 2,342,828,482 distinct states found (1,492,516 ds/min), 62,365,394 states left on queue. +Progress(49) at 2024-11-07 10:25:15: 33,045,061,128 states generated (28,680,110 s/min), 2,344,118,515 distinct states found (1,290,033 ds/min), 61,789,542 states left on queue. +Progress(49) at 2024-11-07 10:26:15: 33,073,888,592 states generated (28,827,464 s/min), 2,345,475,829 distinct states found (1,357,314 ds/min), 61,253,128 states left on queue. +Progress(50) at 2024-11-07 10:27:15: 33,102,491,050 states generated (28,602,458 s/min), 2,346,652,625 distinct states found (1,176,796 ds/min), 60,570,177 states left on queue. +Progress(50) at 2024-11-07 10:28:15: 33,131,166,035 states generated (28,674,985 s/min), 2,347,941,873 distinct states found (1,289,248 ds/min), 59,969,815 states left on queue. +Progress(50) at 2024-11-07 10:29:15: 33,160,270,838 states generated (29,104,803 s/min), 2,349,441,004 distinct states found (1,499,131 ds/min), 59,570,847 states left on queue. +Progress(50) at 2024-11-07 10:30:15: 33,189,149,869 states generated (28,879,031 s/min), 2,350,812,706 distinct states found (1,371,702 ds/min), 59,068,202 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 10:31:16) +Progress(50) at 2024-11-07 10:31:16: 33,218,286,121 states generated (29,136,252 s/min), 2,352,357,375 distinct states found (1,544,669 ds/min), 58,692,343 states left on queue. +Progress(50) at 2024-11-07 10:32:16: 33,246,927,616 states generated (28,641,495 s/min), 2,353,796,993 distinct states found (1,439,618 ds/min), 58,245,674 states left on queue. +Progress(50) at 2024-11-07 10:33:16: 33,275,692,609 states generated (28,764,993 s/min), 2,355,282,278 distinct states found (1,485,285 ds/min), 57,825,713 states left on queue. +Progress(50) at 2024-11-07 10:34:16: 33,304,267,545 states generated (28,574,936 s/min), 2,356,681,270 distinct states found (1,398,992 ds/min), 57,325,849 states left on queue. +Progress(50) at 2024-11-07 10:35:16: 33,332,888,163 states generated (28,620,618 s/min), 2,358,099,683 distinct states found (1,418,413 ds/min), 56,833,993 states left on queue. +Progress(50) at 2024-11-07 10:36:16: 33,361,236,042 states generated (28,347,879 s/min), 2,359,281,358 distinct states found (1,181,675 ds/min), 56,126,890 states left on queue. +Progress(50) at 2024-11-07 10:37:16: 33,390,140,655 states generated (28,904,613 s/min), 2,360,868,517 distinct states found (1,587,159 ds/min), 55,791,859 states left on queue. +Progress(50) at 2024-11-07 10:38:16: 33,418,998,816 states generated (28,858,161 s/min), 2,362,363,780 distinct states found (1,495,263 ds/min), 55,385,255 states left on queue. +Progress(50) at 2024-11-07 10:39:16: 33,447,612,810 states generated (28,613,994 s/min), 2,363,728,858 distinct states found (1,365,078 ds/min), 54,854,942 states left on queue. +Progress(50) at 2024-11-07 10:40:16: 33,476,162,070 states generated (28,549,260 s/min), 2,365,099,267 distinct states found (1,370,409 ds/min), 54,312,039 states left on queue. +Progress(50) at 2024-11-07 10:41:16: 33,504,811,505 states generated (28,649,435 s/min), 2,366,473,549 distinct states found (1,374,282 ds/min), 53,784,809 states left on queue. +Progress(50) at 2024-11-07 10:42:16: 33,533,403,252 states generated (28,591,747 s/min), 2,367,734,253 distinct states found (1,260,704 ds/min), 53,158,819 states left on queue. +Progress(50) at 2024-11-07 10:43:16: 33,561,952,889 states generated (28,549,637 s/min), 2,368,855,124 distinct states found (1,120,871 ds/min), 52,441,471 states left on queue. +Progress(50) at 2024-11-07 10:44:16: 33,590,825,690 states generated (28,872,801 s/min), 2,370,054,403 distinct states found (1,199,279 ds/min), 51,878,202 states left on queue. +Progress(50) at 2024-11-07 10:45:16: 33,619,895,477 states generated (29,069,787 s/min), 2,371,355,035 distinct states found (1,300,632 ds/min), 51,382,836 states left on queue. +Progress(50) at 2024-11-07 10:46:16: 33,648,391,719 states generated (28,496,242 s/min), 2,372,441,699 distinct states found (1,086,664 ds/min), 50,647,071 states left on queue. +Progress(50) at 2024-11-07 10:47:16: 33,677,074,147 states generated (28,682,428 s/min), 2,373,600,507 distinct states found (1,158,808 ds/min), 50,052,421 states left on queue. +Progress(50) at 2024-11-07 10:48:16: 33,705,980,713 states generated (28,906,566 s/min), 2,375,050,402 distinct states found (1,449,895 ds/min), 49,692,912 states left on queue. +Progress(50) at 2024-11-07 10:49:16: 33,734,700,309 states generated (28,719,596 s/min), 2,376,355,805 distinct states found (1,305,403 ds/min), 49,202,990 states left on queue. +Progress(50) at 2024-11-07 10:50:16: 33,763,294,505 states generated (28,594,196 s/min), 2,377,489,014 distinct states found (1,133,209 ds/min), 48,526,991 states left on queue. +Progress(50) at 2024-11-07 10:51:16: 33,791,781,835 states generated (28,487,330 s/min), 2,378,610,114 distinct states found (1,121,100 ds/min), 47,806,234 states left on queue. +Progress(50) at 2024-11-07 10:52:16: 33,820,496,936 states generated (28,715,101 s/min), 2,379,861,294 distinct states found (1,251,180 ds/min), 47,194,112 states left on queue. +Progress(50) at 2024-11-07 10:53:16: 33,848,955,580 states generated (28,458,644 s/min), 2,381,018,247 distinct states found (1,156,953 ds/min), 46,544,595 states left on queue. +Progress(50) at 2024-11-07 10:54:16: 33,877,358,985 states generated (28,403,405 s/min), 2,382,084,162 distinct states found (1,065,915 ds/min), 45,797,353 states left on queue. +Progress(50) at 2024-11-07 10:55:16: 33,905,938,026 states generated (28,579,041 s/min), 2,383,237,725 distinct states found (1,153,563 ds/min), 45,079,182 states left on queue. +Progress(50) at 2024-11-07 10:56:16: 33,934,925,952 states generated (28,987,926 s/min), 2,384,648,770 distinct states found (1,411,045 ds/min), 44,602,865 states left on queue. +Progress(50) at 2024-11-07 10:57:16: 33,963,625,658 states generated (28,699,706 s/min), 2,385,892,826 distinct states found (1,244,056 ds/min), 44,000,281 states left on queue. +Progress(50) at 2024-11-07 10:58:16: 33,992,548,128 states generated (28,922,470 s/min), 2,387,290,030 distinct states found (1,397,204 ds/min), 43,514,140 states left on queue. +Progress(51) at 2024-11-07 10:59:16: 34,021,202,960 states generated (28,654,832 s/min), 2,388,511,227 distinct states found (1,221,197 ds/min), 42,867,785 states left on queue. +Progress(51) at 2024-11-07 11:00:16: 34,049,640,853 states generated (28,437,893 s/min), 2,389,565,989 distinct states found (1,054,762 ds/min), 42,084,713 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 11:01:17) +Progress(51) at 2024-11-07 11:01:17: 34,079,102,421 states generated (29,461,568 s/min), 2,391,039,395 distinct states found (1,473,406 ds/min), 41,644,463 states left on queue. +Progress(51) at 2024-11-07 11:02:17: 34,107,932,294 states generated (28,829,873 s/min), 2,392,415,920 distinct states found (1,376,525 ds/min), 41,153,999 states left on queue. +Progress(51) at 2024-11-07 11:03:17: 34,136,619,823 states generated (28,687,529 s/min), 2,393,784,341 distinct states found (1,368,421 ds/min), 40,648,398 states left on queue. +Progress(51) at 2024-11-07 11:04:17: 34,165,416,573 states generated (28,796,750 s/min), 2,395,186,568 distinct states found (1,402,227 ds/min), 40,162,223 states left on queue. +Progress(51) at 2024-11-07 11:05:17: 34,193,934,145 states generated (28,517,572 s/min), 2,396,461,207 distinct states found (1,274,639 ds/min), 39,558,749 states left on queue. +Progress(51) at 2024-11-07 11:06:17: 34,222,437,146 states generated (28,503,001 s/min), 2,397,667,005 distinct states found (1,205,798 ds/min), 38,877,170 states left on queue. +Progress(51) at 2024-11-07 11:07:17: 34,251,162,633 states generated (28,725,487 s/min), 2,399,047,586 distinct states found (1,380,581 ds/min), 38,366,536 states left on queue. +Progress(51) at 2024-11-07 11:08:17: 34,280,005,309 states generated (28,842,676 s/min), 2,400,476,715 distinct states found (1,429,129 ds/min), 37,912,093 states left on queue. +Progress(51) at 2024-11-07 11:09:17: 34,308,388,681 states generated (28,383,372 s/min), 2,401,648,509 distinct states found (1,171,794 ds/min), 37,215,479 states left on queue. +Progress(51) at 2024-11-07 11:10:17: 34,337,086,557 states generated (28,697,876 s/min), 2,403,035,913 distinct states found (1,387,404 ds/min), 36,712,331 states left on queue. +Progress(51) at 2024-11-07 11:11:17: 34,365,565,315 states generated (28,478,758 s/min), 2,404,187,792 distinct states found (1,151,879 ds/min), 36,008,223 states left on queue. +Progress(51) at 2024-11-07 11:12:17: 34,394,280,845 states generated (28,715,530 s/min), 2,405,264,161 distinct states found (1,076,369 ds/min), 35,318,651 states left on queue. +Progress(51) at 2024-11-07 11:13:17: 34,423,292,173 states generated (29,011,328 s/min), 2,406,461,030 distinct states found (1,196,869 ds/min), 34,731,310 states left on queue. +Progress(51) at 2024-11-07 11:14:17: 34,451,717,631 states generated (28,425,458 s/min), 2,407,470,263 distinct states found (1,009,233 ds/min), 33,977,845 states left on queue. +Progress(51) at 2024-11-07 11:15:17: 34,480,582,848 states generated (28,865,217 s/min), 2,408,844,472 distinct states found (1,374,209 ds/min), 33,563,385 states left on queue. +Progress(51) at 2024-11-07 11:16:17: 34,509,255,375 states generated (28,672,527 s/min), 2,409,992,223 distinct states found (1,147,751 ds/min), 32,948,371 states left on queue. +Progress(51) at 2024-11-07 11:17:17: 34,537,627,156 states generated (28,371,781 s/min), 2,411,007,744 distinct states found (1,015,521 ds/min), 32,138,450 states left on queue. +Progress(51) at 2024-11-07 11:18:17: 34,566,104,650 states generated (28,477,494 s/min), 2,412,094,834 distinct states found (1,087,090 ds/min), 31,405,790 states left on queue. +Progress(51) at 2024-11-07 11:19:17: 34,594,468,421 states generated (28,363,771 s/min), 2,413,136,514 distinct states found (1,041,680 ds/min), 30,631,648 states left on queue. +Progress(51) at 2024-11-07 11:20:17: 34,623,282,746 states generated (28,814,325 s/min), 2,414,376,756 distinct states found (1,240,242 ds/min), 30,011,457 states left on queue. +Progress(51) at 2024-11-07 11:21:17: 34,652,013,328 states generated (28,730,582 s/min), 2,415,631,977 distinct states found (1,255,221 ds/min), 29,420,035 states left on queue. +Progress(51) at 2024-11-07 11:22:17: 34,680,708,001 states generated (28,694,673 s/min), 2,416,841,149 distinct states found (1,209,172 ds/min), 28,780,239 states left on queue. +Progress(52) at 2024-11-07 11:23:17: 34,709,197,697 states generated (28,489,696 s/min), 2,417,931,157 distinct states found (1,090,008 ds/min), 28,033,256 states left on queue. +Progress(52) at 2024-11-07 11:24:17: 34,738,057,742 states generated (28,860,045 s/min), 2,419,214,866 distinct states found (1,283,709 ds/min), 27,476,210 states left on queue. +Progress(52) at 2024-11-07 11:25:17: 34,766,795,719 states generated (28,737,977 s/min), 2,420,575,203 distinct states found (1,360,337 ds/min), 26,973,510 states left on queue. +Progress(52) at 2024-11-07 11:26:17: 34,795,409,801 states generated (28,614,082 s/min), 2,421,852,170 distinct states found (1,276,967 ds/min), 26,383,152 states left on queue. +Progress(52) at 2024-11-07 11:27:17: 34,823,871,413 states generated (28,461,612 s/min), 2,423,018,118 distinct states found (1,165,948 ds/min), 25,687,358 states left on queue. +Progress(52) at 2024-11-07 11:28:17: 34,852,452,267 states generated (28,580,854 s/min), 2,424,258,491 distinct states found (1,240,373 ds/min), 25,061,677 states left on queue. +Progress(52) at 2024-11-07 11:29:17: 34,881,109,110 states generated (28,656,843 s/min), 2,425,536,450 distinct states found (1,277,959 ds/min), 24,485,682 states left on queue. +Progress(52) at 2024-11-07 11:30:17: 34,909,638,357 states generated (28,529,247 s/min), 2,426,766,241 distinct states found (1,229,791 ds/min), 23,851,800 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 11:31:18) +Progress(52) at 2024-11-07 11:31:18: 34,938,217,205 states generated (28,578,848 s/min), 2,427,804,784 distinct states found (1,038,543 ds/min), 23,061,400 states left on queue. +Progress(52) at 2024-11-07 11:32:18: 34,967,089,391 states generated (28,872,186 s/min), 2,428,907,251 distinct states found (1,102,467 ds/min), 22,421,037 states left on queue. +Progress(52) at 2024-11-07 11:33:18: 34,995,531,710 states generated (28,442,319 s/min), 2,429,963,142 distinct states found (1,055,891 ds/min), 21,740,235 states left on queue. +Progress(52) at 2024-11-07 11:34:18: 35,024,141,172 states generated (28,609,462 s/min), 2,431,122,150 distinct states found (1,159,008 ds/min), 21,149,288 states left on queue. +Progress(52) at 2024-11-07 11:35:18: 35,052,351,960 states generated (28,210,788 s/min), 2,432,077,858 distinct states found (955,708 ds/min), 20,295,072 states left on queue. +Progress(52) at 2024-11-07 11:36:18: 35,080,654,028 states generated (28,302,068 s/min), 2,433,061,991 distinct states found (984,133 ds/min), 19,478,746 states left on queue. +Progress(52) at 2024-11-07 11:37:18: 35,109,293,099 states generated (28,639,071 s/min), 2,434,258,110 distinct states found (1,196,119 ds/min), 18,850,062 states left on queue. +Progress(53) at 2024-11-07 11:38:18: 35,137,874,307 states generated (28,581,208 s/min), 2,435,408,538 distinct states found (1,150,428 ds/min), 18,171,042 states left on queue. +Progress(53) at 2024-11-07 11:39:18: 35,166,493,712 states generated (28,619,405 s/min), 2,436,567,034 distinct states found (1,158,496 ds/min), 17,510,811 states left on queue. +Progress(53) at 2024-11-07 11:40:18: 35,195,076,188 states generated (28,582,476 s/min), 2,437,810,887 distinct states found (1,243,853 ds/min), 16,916,098 states left on queue. +Progress(53) at 2024-11-07 11:41:18: 35,223,492,769 states generated (28,416,581 s/min), 2,438,939,934 distinct states found (1,129,047 ds/min), 16,200,301 states left on queue. +Progress(53) at 2024-11-07 11:42:18: 35,252,026,035 states generated (28,533,266 s/min), 2,440,130,151 distinct states found (1,190,217 ds/min), 15,545,447 states left on queue. +Progress(53) at 2024-11-07 11:43:18: 35,280,482,465 states generated (28,456,430 s/min), 2,441,297,027 distinct states found (1,166,876 ds/min), 14,879,990 states left on queue. +Progress(53) at 2024-11-07 11:44:18: 35,308,940,796 states generated (28,458,331 s/min), 2,442,317,453 distinct states found (1,020,426 ds/min), 14,116,803 states left on queue. +Progress(53) at 2024-11-07 11:45:18: 35,337,597,306 states generated (28,656,510 s/min), 2,443,328,791 distinct states found (1,011,338 ds/min), 13,403,307 states left on queue. +Progress(53) at 2024-11-07 11:46:18: 35,366,058,165 states generated (28,460,859 s/min), 2,444,336,498 distinct states found (1,007,707 ds/min), 12,657,418 states left on queue. +Progress(53) at 2024-11-07 11:47:18: 35,394,499,327 states generated (28,441,162 s/min), 2,445,346,072 distinct states found (1,009,574 ds/min), 11,856,670 states left on queue. +Progress(53) at 2024-11-07 11:48:18: 35,423,058,448 states generated (28,559,121 s/min), 2,446,449,527 distinct states found (1,103,455 ds/min), 11,150,850 states left on queue. +Progress(54) at 2024-11-07 11:49:18: 35,451,714,950 states generated (28,656,502 s/min), 2,447,608,246 distinct states found (1,158,719 ds/min), 10,497,489 states left on queue. +Progress(54) at 2024-11-07 11:50:18: 35,480,075,027 states generated (28,360,077 s/min), 2,448,668,413 distinct states found (1,060,167 ds/min), 9,734,924 states left on queue. +Progress(54) at 2024-11-07 11:51:18: 35,508,544,241 states generated (28,469,214 s/min), 2,449,793,995 distinct states found (1,125,582 ds/min), 9,041,108 states left on queue. +Progress(54) at 2024-11-07 11:52:18: 35,537,058,894 states generated (28,514,653 s/min), 2,450,835,560 distinct states found (1,041,565 ds/min), 8,304,357 states left on queue. +Progress(54) at 2024-11-07 11:53:18: 35,565,617,770 states generated (28,558,876 s/min), 2,451,805,307 distinct states found (969,747 ds/min), 7,554,593 states left on queue. +Progress(54) at 2024-11-07 11:54:18: 35,594,096,319 states generated (28,478,549 s/min), 2,452,829,286 distinct states found (1,023,979 ds/min), 6,777,854 states left on queue. +Progress(55) at 2024-11-07 11:55:18: 35,622,658,049 states generated (28,561,730 s/min), 2,453,911,213 distinct states found (1,081,927 ds/min), 6,063,348 states left on queue. +Progress(55) at 2024-11-07 11:56:18: 35,651,019,108 states generated (28,361,059 s/min), 2,454,944,844 distinct states found (1,033,631 ds/min), 5,290,297 states left on queue. +Progress(55) at 2024-11-07 11:57:18: 35,679,577,103 states generated (28,557,995 s/min), 2,455,941,484 distinct states found (996,640 ds/min), 4,540,257 states left on queue. +Progress(55) at 2024-11-07 11:58:18: 35,708,050,230 states generated (28,473,127 s/min), 2,456,911,566 distinct states found (970,082 ds/min), 3,737,722 states left on queue. +Progress(55) at 2024-11-07 11:59:18: 35,736,484,911 states generated (28,434,681 s/min), 2,457,942,176 distinct states found (1,030,610 ds/min), 2,980,348 states left on queue. +Progress(56) at 2024-11-07 12:00:18: 35,765,029,620 states generated (28,544,709 s/min), 2,458,911,346 distinct states found (969,170 ds/min), 2,201,353 states left on queue. +Checkpointing of run states/24-11-06-15-30-45.354 +Checkpointing completed at (2024-11-07 12:01:18) +Progress(57) at 2024-11-07 12:01:18: 35,793,733,161 states generated (28,703,541 s/min), 2,459,897,228 distinct states found (985,882 ds/min), 1,411,705 states left on queue. +Progress(58) at 2024-11-07 12:02:18: 35,822,110,432 states generated (28,377,271 s/min), 2,460,820,961 distinct states found (923,733 ds/min), 587,430 states left on queue. +Model checking completed. No error has been found. + Estimates of the probability that TLC did not check all reachable states + because two distinct states had the same fingerprint: + calculated (optimistic): val = 4.5 + based on the actual fingerprints: val = .25 +35840434685 states generated, 2461362509 distinct states found, 0 states left on queue. +The depth of the complete state graph search is 67. +The average outdegree of the complete state graph is 1 (minimum is 0, the maximum 8 and the 95th percentile is 2). +Finished in 20h 32min at (2024-11-07 12:03:02) diff --git a/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a5_t2_l2.cfg-2024-11-06--12-09-32.log b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a5_t2_l2.cfg-2024-11-06--12-09-32.log new file mode 100644 index 000000000000..c43d52302b3c --- /dev/null +++ b/safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a5_t2_l2.cfg-2024-11-06--12-09-32.log @@ -0,0 +1,89 @@ +git revision: 864f4667d +Platform: Linux neon-dev-arm64-1 6.8.0-48-generic #48-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 27 14:35:45 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux +CPU Info Linux: Neoverse-N1 +CPU Cores Linux: 80 +CPU Info Mac: +CPU Cores Mac: +Spec: MCProposerAcceptorStatic.tla +Config: models/MCProposerAcceptorStatic_p2_a5_t2_l2.cfg +---- +CONSTANTS +NULL = NULL +proposers = {p1, p2} +acceptors = {a1, a2, a3, a4, a5} +max_term = 2 +max_entries = 2 +SPECIFICATION Spec +CONSTRAINT StateConstraint +INVARIANT +TypeOk +ElectionSafety +LogIsMonotonic +LogSafety +SYMMETRY ProposerAcceptorSymmetry +CHECK_DEADLOCK FALSE +ALIAS Alias + +---- + +TLC2 Version 2.20 of Day Month 20?? (rev: f68cb71) +Running breadth-first search Model-Checking with fp 90 and seed 2164066158568118414 with 80 workers on 80 cores with 54613MB heap and 61440MB offheap memory [pid: 30788] (Linux 6.8.0-48-generic aarch64, Ubuntu 21.0.4 x86_64, OffHeapDiskFPSet, DiskStateQueue). +Parsing file /home/arseny/neon/safekeeper/spec/MCProposerAcceptorStatic.tla +Parsing file /tmp/tlc-13824636513165485309/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla) +Parsing file /home/arseny/neon/safekeeper/spec/ProposerAcceptorStatic.tla +Parsing file /tmp/tlc-13824636513165485309/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla) +Parsing file /tmp/tlc-13824636513165485309/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla) +Parsing file /tmp/tlc-13824636513165485309/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla) +Parsing file /tmp/tlc-13824636513165485309/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla) +Parsing file /tmp/tlc-13824636513165485309/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla) +Parsing file /tmp/tlc-13824636513165485309/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla) +Semantic processing of module Naturals +Semantic processing of module Sequences +Semantic processing of module FiniteSets +Semantic processing of module TLC +Semantic processing of module Integers +Semantic processing of module ProposerAcceptorStatic +Semantic processing of module TLCExt +Semantic processing of module _TLCTrace +Semantic processing of module MCProposerAcceptorStatic +Starting... (2024-11-06 12:09:33) +Computing initial states... +Finished computing initial states: 1 distinct state generated at 2024-11-06 12:09:36. +Progress(16) at 2024-11-06 12:09:39: 405,675 states generated (405,675 s/min), 18,042 distinct states found (18,042 ds/min), 7,612 states left on queue. +Progress(23) at 2024-11-06 12:10:39: 12,449,257 states generated (12,043,582 s/min), 467,293 distinct states found (449,251 ds/min), 161,057 states left on queue. +Progress(25) at 2024-11-06 12:11:39: 24,461,332 states generated (12,012,075 s/min), 861,011 distinct states found (393,718 ds/min), 267,072 states left on queue. +Progress(26) at 2024-11-06 12:12:39: 36,440,377 states generated (11,979,045 s/min), 1,234,052 distinct states found (373,041 ds/min), 355,372 states left on queue. +Progress(26) at 2024-11-06 12:13:39: 48,327,873 states generated (11,887,496 s/min), 1,583,736 distinct states found (349,684 ds/min), 425,209 states left on queue. +Progress(27) at 2024-11-06 12:14:39: 60,246,136 states generated (11,918,263 s/min), 1,933,499 distinct states found (349,763 ds/min), 494,269 states left on queue. +Progress(28) at 2024-11-06 12:15:39: 71,977,716 states generated (11,731,580 s/min), 2,265,302 distinct states found (331,803 ds/min), 553,777 states left on queue. +Progress(28) at 2024-11-06 12:16:39: 83,644,537 states generated (11,666,821 s/min), 2,575,451 distinct states found (310,149 ds/min), 594,142 states left on queue. +Progress(29) at 2024-11-06 12:17:39: 95,287,089 states generated (11,642,552 s/min), 2,888,793 distinct states found (313,342 ds/min), 639,273 states left on queue. +Progress(29) at 2024-11-06 12:18:39: 107,000,972 states generated (11,713,883 s/min), 3,194,255 distinct states found (305,462 ds/min), 673,353 states left on queue. +Progress(29) at 2024-11-06 12:19:39: 118,305,248 states generated (11,304,276 s/min), 3,467,775 distinct states found (273,520 ds/min), 692,915 states left on queue. +Progress(29) at 2024-11-06 12:20:39: 129,954,327 states generated (11,649,079 s/min), 3,763,186 distinct states found (295,411 ds/min), 720,349 states left on queue. +Progress(29) at 2024-11-06 12:21:39: 141,251,359 states generated (11,297,032 s/min), 4,020,407 distinct states found (257,221 ds/min), 724,036 states left on queue. +Progress(30) at 2024-11-06 12:22:39: 152,551,873 states generated (11,300,514 s/min), 4,284,278 distinct states found (263,871 ds/min), 733,726 states left on queue. +Progress(30) at 2024-11-06 12:23:39: 164,324,788 states generated (11,772,915 s/min), 4,569,569 distinct states found (285,291 ds/min), 746,476 states left on queue. +Progress(30) at 2024-11-06 12:24:39: 175,121,317 states generated (10,796,529 s/min), 4,779,505 distinct states found (209,936 ds/min), 723,070 states left on queue. +Progress(31) at 2024-11-06 12:25:39: 186,238,236 states generated (11,116,919 s/min), 5,016,034 distinct states found (236,529 ds/min), 712,944 states left on queue. +Progress(31) at 2024-11-06 12:26:39: 197,884,578 states generated (11,646,342 s/min), 5,276,094 distinct states found (260,060 ds/min), 705,471 states left on queue. +Progress(31) at 2024-11-06 12:27:39: 208,535,096 states generated (10,650,518 s/min), 5,463,450 distinct states found (187,356 ds/min), 665,661 states left on queue. +Progress(32) at 2024-11-06 12:28:39: 219,424,829 states generated (10,889,733 s/min), 5,673,673 distinct states found (210,223 ds/min), 637,975 states left on queue. +Progress(32) at 2024-11-06 12:29:39: 230,906,372 states generated (11,481,543 s/min), 5,903,516 distinct states found (229,843 ds/min), 606,255 states left on queue. +Progress(33) at 2024-11-06 12:30:39: 241,261,887 states generated (10,355,515 s/min), 6,065,731 distinct states found (162,215 ds/min), 552,728 states left on queue. +Progress(33) at 2024-11-06 12:31:39: 252,028,921 states generated (10,767,034 s/min), 6,255,487 distinct states found (189,756 ds/min), 509,620 states left on queue. +Progress(33) at 2024-11-06 12:32:39: 262,856,171 states generated (10,827,250 s/min), 6,431,063 distinct states found (175,576 ds/min), 448,834 states left on queue. +Progress(34) at 2024-11-06 12:33:39: 273,211,882 states generated (10,355,711 s/min), 6,586,644 distinct states found (155,581 ds/min), 386,905 states left on queue. +Progress(34) at 2024-11-06 12:34:39: 283,843,415 states generated (10,631,533 s/min), 6,743,916 distinct states found (157,272 ds/min), 315,135 states left on queue. +Progress(35) at 2024-11-06 12:35:39: 293,931,115 states generated (10,087,700 s/min), 6,878,405 distinct states found (134,489 ds/min), 241,126 states left on queue. +Progress(36) at 2024-11-06 12:36:39: 303,903,441 states generated (9,972,326 s/min), 6,996,394 distinct states found (117,989 ds/min), 152,775 states left on queue. +Progress(37) at 2024-11-06 12:37:39: 313,501,886 states generated (9,598,445 s/min), 7,093,031 distinct states found (96,637 ds/min), 54,009 states left on queue. +Model checking completed. No error has been found. + Estimates of the probability that TLC did not check all reachable states + because two distinct states had the same fingerprint: + calculated (optimistic): val = 1.2E-4 + based on the actual fingerprints: val = 2.1E-6 +318172398 states generated, 7127950 distinct states found, 0 states left on queue. +The depth of the complete state graph search is 44. +The average outdegree of the complete state graph is 1 (minimum is 0, the maximum 9 and the 95th percentile is 3). +Finished in 28min 43s at (2024-11-06 12:38:16) diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 3659bcd7e048..4dc7edef371f 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -51,6 +51,11 @@ use utils::{ #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). +#[allow(non_upper_case_globals)] +#[export_name = "malloc_conf"] +pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; + const PID_FILE_NAME: &str = "safekeeper.pid"; const ID_FILE_NAME: &str = "safekeeper.id"; diff --git a/safekeeper/src/http/client.rs b/safekeeper/src/http/client.rs index c56f7880d4f8..a166fc1ab9b0 100644 --- a/safekeeper/src/http/client.rs +++ b/safekeeper/src/http/client.rs @@ -8,6 +8,7 @@ //! etc. use reqwest::{IntoUrl, Method, StatusCode}; +use std::error::Error as _; use utils::{ http::error::HttpErrorBody, id::{NodeId, TenantId, TimelineId}, @@ -26,7 +27,7 @@ pub struct Client { #[derive(thiserror::Error, Debug)] pub enum Error { /// Failed to receive body (reqwest error). - #[error("receive body: {0}")] + #[error("receive body: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())] ReceiveBody(reqwest::Error), /// Status is not ok, but failed to parse body as `HttpErrorBody`. diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 28294abdb929..69b775fd7673 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -14,7 +14,8 @@ use tokio_util::sync::CancellationToken; use tracing::{info_span, Instrument}; use utils::failpoint_support::failpoints_handler; use utils::http::endpoint::{ - profile_cpu_handler, prometheus_metrics_handler, request_span, ChannelWriter, + profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span, + ChannelWriter, }; use utils::http::request::parse_query_param; @@ -573,7 +574,8 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder let mut router = endpoint::make_router(); if conf.http_auth.is_some() { router = router.middleware(auth_middleware(|request| { - const ALLOWLIST_ROUTES: &[&str] = &["/v1/status", "/metrics", "/profile/cpu"]; + const ALLOWLIST_ROUTES: &[&str] = + &["/v1/status", "/metrics", "/profile/cpu", "profile/heap"]; if ALLOWLIST_ROUTES.contains(&request.uri().path()) { None } else { @@ -594,6 +596,7 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder .data(auth) .get("/metrics", |r| request_span(r, prometheus_metrics_handler)) .get("/profile/cpu", |r| request_span(r, profile_cpu_handler)) + .get("/profile/heap", |r| request_span(r, profile_heap_handler)) .get("/v1/status", |r| request_span(r, status_handler)) .put("/v1/failpoints", |r| { request_span(r, move |r| async { diff --git a/storage_controller/src/compute_hook.rs b/storage_controller/src/compute_hook.rs index b63a322b879f..2b2ece3f0271 100644 --- a/storage_controller/src/compute_hook.rs +++ b/storage_controller/src/compute_hook.rs @@ -1,3 +1,4 @@ +use std::error::Error as _; use std::sync::Arc; use std::{collections::HashMap, time::Duration}; @@ -172,7 +173,7 @@ struct ComputeHookNotifyRequest { #[derive(thiserror::Error, Debug)] pub(crate) enum NotifyError { // Request was not send successfully, e.g. transport error - #[error("Sending request: {0}")] + #[error("Sending request: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())] Request(#[from] reqwest::Error), // Request could not be serviced right now due to ongoing Operation in control plane, but should be possible soon. #[error("Control plane tenant busy")] diff --git a/storage_controller/src/metrics.rs b/storage_controller/src/metrics.rs index a1f7bc24575b..6d5885eba657 100644 --- a/storage_controller/src/metrics.rs +++ b/storage_controller/src/metrics.rs @@ -50,6 +50,12 @@ pub(crate) struct StorageControllerMetricGroup { /// Count of how many times we make an optimization change to a tenant's scheduling pub(crate) storage_controller_schedule_optimization: measured::Counter, + /// How many shards are not scheduled into their preferred AZ + pub(crate) storage_controller_schedule_az_violation: measured::Gauge, + + /// How many shards would like to reconcile but were blocked by concurrency limits + pub(crate) storage_controller_pending_reconciles: measured::Gauge, + /// HTTP request status counters for handled requests pub(crate) storage_controller_http_request_status: measured::CounterVec, diff --git a/storage_controller/src/peer_client.rs b/storage_controller/src/peer_client.rs index 3f8520fe557c..ee4eb55294d1 100644 --- a/storage_controller/src/peer_client.rs +++ b/storage_controller/src/peer_client.rs @@ -1,7 +1,9 @@ use crate::tenant_shard::ObservedState; use pageserver_api::shard::TenantShardId; use serde::{Deserialize, Serialize}; -use std::{collections::HashMap, time::Duration}; +use std::collections::HashMap; +use std::error::Error as _; +use std::time::Duration; use tokio_util::sync::CancellationToken; use hyper::Uri; @@ -17,11 +19,14 @@ pub(crate) struct PeerClient { #[derive(thiserror::Error, Debug)] pub(crate) enum StorageControllerPeerError { - #[error("failed to deserialize error response with status code {0} at {1}: {2}")] + #[error( + "failed to deserialize error response with status code {0} at {1}: {2}{}", + .2.source().map(|e| format!(": {e}")).unwrap_or_default() + )] DeserializationError(StatusCode, Url, reqwest::Error), #[error("storage controller peer API error ({0}): {1}")] ApiError(StatusCode, String), - #[error("failed to send HTTP request: {0}")] + #[error("failed to send HTTP request: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())] SendError(reqwest::Error), #[error("Cancelled")] Cancelled, diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index 14cc51240d10..7ca80c7dfeec 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -636,6 +636,13 @@ impl Persistence { .into_boxed(), }; + // Clear generation_pageserver if we are moving into a state where we won't have + // any attached pageservers. + let input_generation_pageserver = match input_placement_policy { + None | Some(PlacementPolicy::Attached(_)) => None, + Some(PlacementPolicy::Detached | PlacementPolicy::Secondary) => Some(None), + }; + #[derive(AsChangeset)] #[diesel(table_name = crate::schema::tenant_shards)] struct ShardUpdate { @@ -643,6 +650,7 @@ impl Persistence { placement_policy: Option, config: Option, scheduling_policy: Option, + generation_pageserver: Option>, } let update = ShardUpdate { @@ -655,6 +663,7 @@ impl Persistence { .map(|c| serde_json::to_string(&c).unwrap()), scheduling_policy: input_scheduling_policy .map(|p| serde_json::to_string(&p).unwrap()), + generation_pageserver: input_generation_pageserver, }; query.set(update).execute(conn)?; diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 636ccf11a120..7e4ee53b4cbf 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -44,12 +44,12 @@ use futures::{stream::FuturesUnordered, StreamExt}; use itertools::Itertools; use pageserver_api::{ controller_api::{ - MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability, NodeRegisterRequest, - NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy, ShardSchedulingPolicy, - ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, TenantCreateRequest, - TenantCreateResponse, TenantCreateResponseShard, TenantDescribeResponse, - TenantDescribeResponseShard, TenantLocateResponse, TenantPolicyRequest, - TenantShardMigrateRequest, TenantShardMigrateResponse, + AvailabilityZone, MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability, + NodeRegisterRequest, NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy, + ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, + TenantCreateRequest, TenantCreateResponse, TenantCreateResponseShard, + TenantDescribeResponse, TenantDescribeResponseShard, TenantLocateResponse, + TenantPolicyRequest, TenantShardMigrateRequest, TenantShardMigrateResponse, }, models::{ SecondaryProgress, TenantConfigRequest, TimelineArchivalConfigRequest, @@ -468,6 +468,7 @@ struct ShardSplitParams { policy: PlacementPolicy, config: TenantConfig, shard_ident: ShardIdentity, + preferred_az_id: Option, } // When preparing for a shard split, we may either choose to proceed with the split, @@ -512,6 +513,9 @@ struct ShardUpdate { /// If this is None, generation is not updated. generation: Option, + + /// If this is None, scheduling policy is not updated. + scheduling_policy: Option, } enum StopReconciliationsReason { @@ -788,7 +792,7 @@ impl Service { node_list_futs.push({ async move { tracing::info!("Scanning shards on node {node}..."); - let timeout = Duration::from_secs(1); + let timeout = Duration::from_secs(5); let response = node .with_client_retries( |client| async move { client.list_location_config().await }, @@ -2375,6 +2379,23 @@ impl Service { } }; + // Ordinarily we do not update scheduling policy, but when making major changes + // like detaching or demoting to secondary-only, we need to force the scheduling + // mode to Active, or the caller's expected outcome (detach it) will not happen. + let scheduling_policy = match req.config.mode { + LocationConfigMode::Detached | LocationConfigMode::Secondary => { + // Special case: when making major changes like detaching or demoting to secondary-only, + // we need to force the scheduling mode to Active, or nothing will happen. + Some(ShardSchedulingPolicy::Active) + } + LocationConfigMode::AttachedMulti + | LocationConfigMode::AttachedSingle + | LocationConfigMode::AttachedStale => { + // While attached, continue to respect whatever the existing scheduling mode is. + None + } + }; + let mut create = true; for (shard_id, shard) in tenants.range_mut(TenantShardId::tenant_range(tenant_id)) { // Saw an existing shard: this is not a creation @@ -2400,6 +2421,7 @@ impl Service { placement_policy: placement_policy.clone(), tenant_config: req.config.tenant_conf.clone(), generation: set_generation, + scheduling_policy, }); } @@ -2496,6 +2518,7 @@ impl Service { placement_policy, tenant_config, generation, + scheduling_policy, } in &updates { self.persistence @@ -2504,7 +2527,7 @@ impl Service { Some(placement_policy.clone()), Some(tenant_config.clone()), *generation, - None, + *scheduling_policy, ) .await?; } @@ -2520,6 +2543,7 @@ impl Service { placement_policy, tenant_config, generation: update_generation, + scheduling_policy, } in updates { let Some(shard) = tenants.get_mut(&tenant_shard_id) else { @@ -2538,6 +2562,10 @@ impl Service { shard.generation = Some(generation); } + if let Some(scheduling_policy) = scheduling_policy { + shard.set_scheduling_policy(scheduling_policy); + } + shard.schedule(scheduler, &mut schedule_context)?; let maybe_waiter = self.maybe_reconcile_shard(shard, nodes); @@ -2991,9 +3019,17 @@ impl Service { let TenantPolicyRequest { placement, - scheduling, + mut scheduling, } = req; + if let Some(PlacementPolicy::Detached | PlacementPolicy::Secondary) = placement { + // When someone configures a tenant to detach, we force the scheduling policy to enable + // this to take effect. + if scheduling.is_none() { + scheduling = Some(ShardSchedulingPolicy::Active); + } + } + self.persistence .update_tenant_shard( TenantFilter::Tenant(tenant_id), @@ -4103,7 +4139,7 @@ impl Service { for parent_id in parent_ids { let child_ids = parent_id.split(new_shard_count); - let (pageserver, generation, policy, parent_ident, config) = { + let (pageserver, generation, policy, parent_ident, config, preferred_az) = { let mut old_state = tenants .remove(&parent_id) .expect("It was present, we just split it"); @@ -4122,6 +4158,7 @@ impl Service { old_state.policy.clone(), old_state.shard, old_state.config.clone(), + old_state.preferred_az().cloned(), ) }; @@ -4154,6 +4191,9 @@ impl Service { }; child_state.generation = Some(generation); child_state.config = config.clone(); + if let Some(preferred_az) = &preferred_az { + child_state.set_preferred_az(preferred_az.clone()); + } // The child's TenantShard::splitting is intentionally left at the default value of Idle, // as at this point in the split process we have succeeded and this part is infallible: @@ -4346,6 +4386,7 @@ impl Service { let mut policy = None; let mut config = None; let mut shard_ident = None; + let mut preferred_az_id = None; // Validate input, and calculate which shards we will create let (old_shard_count, targets) = { @@ -4404,6 +4445,9 @@ impl Service { if config.is_none() { config = Some(shard.config.clone()); } + if preferred_az_id.is_none() { + preferred_az_id = shard.preferred_az().cloned(); + } if tenant_shard_id.shard_count.count() == split_req.new_shard_count { tracing::info!( @@ -4474,6 +4518,7 @@ impl Service { policy, config, shard_ident, + preferred_az_id, }))) } @@ -4496,6 +4541,7 @@ impl Service { policy, config, shard_ident, + preferred_az_id, } = *params; // Drop any secondary locations: pageservers do not support splitting these, and in any case the @@ -4569,7 +4615,7 @@ impl Service { // Scheduling policies and preferred AZ do not carry through to children scheduling_policy: serde_json::to_string(&ShardSchedulingPolicy::default()) .unwrap(), - preferred_az_id: None, + preferred_az_id: preferred_az_id.as_ref().map(|az| az.0.clone()), }); } @@ -4689,47 +4735,6 @@ impl Service { let (response, child_locations, waiters) = self.tenant_shard_split_commit_inmem(tenant_id, new_shard_count, new_stripe_size); - // Now that we have scheduled the child shards, attempt to set their preferred AZ - // to that of the pageserver they've been attached on. - let preferred_azs = { - let locked = self.inner.read().unwrap(); - child_locations - .iter() - .filter_map(|(tid, node_id, _stripe_size)| { - let az_id = locked - .nodes - .get(node_id) - .map(|n| n.get_availability_zone_id().clone())?; - - Some((*tid, az_id)) - }) - .collect::>() - }; - - let updated = self - .persistence - .set_tenant_shard_preferred_azs(preferred_azs) - .await - .map_err(|err| { - ApiError::InternalServerError(anyhow::anyhow!( - "Failed to persist preferred az ids: {err}" - )) - }); - - match updated { - Ok(updated) => { - let mut locked = self.inner.write().unwrap(); - for (tid, az_id) in updated { - if let Some(shard) = locked.tenants.get_mut(&tid) { - shard.set_preferred_az(az_id); - } - } - } - Err(err) => { - tracing::warn!("Failed to persist preferred AZs after split: {err}"); - } - } - // Send compute notifications for all the new shards let mut failed_notifications = Vec::new(); for (child_id, child_ps, stripe_size) in child_locations { @@ -5158,34 +5163,38 @@ impl Service { *nodes = Arc::new(nodes_mut); } - for (tenant_shard_id, shard) in tenants { - if shard.deref_node(node_id) { - // FIXME: we need to build a ScheduleContext that reflects this shard's peers, otherwise - // it won't properly do anti-affinity. - let mut schedule_context = ScheduleContext::default(); + for (_tenant_id, mut schedule_context, shards) in + TenantShardContextIterator::new(tenants, ScheduleMode::Normal) + { + for shard in shards { + if shard.deref_node(node_id) { + if let Err(e) = shard.schedule(scheduler, &mut schedule_context) { + // TODO: implement force flag to remove a node even if we can't reschedule + // a tenant + tracing::error!( + "Refusing to delete node, shard {} can't be rescheduled: {e}", + shard.tenant_shard_id + ); + return Err(e.into()); + } else { + tracing::info!( + "Rescheduled shard {} away from node during deletion", + shard.tenant_shard_id + ) + } - if let Err(e) = shard.schedule(scheduler, &mut schedule_context) { - // TODO: implement force flag to remove a node even if we can't reschedule - // a tenant - tracing::error!("Refusing to delete node, shard {tenant_shard_id} can't be rescheduled: {e}"); - return Err(e.into()); - } else { - tracing::info!( - "Rescheduled shard {tenant_shard_id} away from node during deletion" - ) + self.maybe_reconcile_shard(shard, nodes); } - self.maybe_reconcile_shard(shard, nodes); + // Here we remove an existing observed location for the node we're removing, and it will + // not be re-added by a reconciler's completion because we filter out removed nodes in + // process_result. + // + // Note that we update the shard's observed state _after_ calling maybe_reconcile_shard: that + // means any reconciles we spawned will know about the node we're deleting, enabling them + // to do live migrations if it's still online. + shard.observed.locations.remove(&node_id); } - - // Here we remove an existing observed location for the node we're removing, and it will - // not be re-added by a reconciler's completion because we filter out removed nodes in - // process_result. - // - // Note that we update the shard's observed state _after_ calling maybe_reconcile_shard: that - // means any reconciles we spawned will know about the node we're deleting, enabling them - // to do live migrations if it's still online. - shard.observed.locations.remove(&node_id); } scheduler.node_remove(node_id); @@ -5707,7 +5716,7 @@ impl Service { } match node_policy { - NodeSchedulingPolicy::Active | NodeSchedulingPolicy::Pause => { + NodeSchedulingPolicy::Active => { self.node_configure(node_id, None, Some(NodeSchedulingPolicy::Draining)) .await?; @@ -6016,14 +6025,33 @@ impl Service { let (nodes, tenants, _scheduler) = locked.parts_mut(); let pageservers = nodes.clone(); + // This function is an efficient place to update lazy statistics, since we are walking + // all tenants. + let mut pending_reconciles = 0; + let mut az_violations = 0; + let mut reconciles_spawned = 0; for shard in tenants.values_mut() { + // Accumulate scheduling statistics + if let (Some(attached), Some(preferred)) = + (shard.intent.get_attached(), shard.preferred_az()) + { + let node_az = nodes + .get(attached) + .expect("Nodes exist if referenced") + .get_availability_zone_id(); + if node_az != preferred { + az_violations += 1; + } + } + // Skip checking if this shard is already enqueued for reconciliation if shard.delayed_reconcile && self.reconciler_concurrency.available_permits() == 0 { // If there is something delayed, then return a nonzero count so that // callers like reconcile_all_now do not incorrectly get the impression // that the system is in a quiescent state. reconciles_spawned = std::cmp::max(1, reconciles_spawned); + pending_reconciles += 1; continue; } @@ -6031,9 +6059,22 @@ impl Service { // dirty, spawn another rone if self.maybe_reconcile_shard(shard, &pageservers).is_some() { reconciles_spawned += 1; + } else if shard.delayed_reconcile { + // Shard wanted to reconcile but for some reason couldn't. + pending_reconciles += 1; } } + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_schedule_az_violation + .set(az_violations as i64); + + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_pending_reconciles + .set(pending_reconciles as i64); + reconciles_spawned } @@ -6247,6 +6288,14 @@ impl Service { > DOWNLOAD_FRESHNESS_THRESHOLD { tracing::info!("Skipping migration of {tenant_shard_id} to {node} because secondary isn't ready: {progress:?}"); + + #[cfg(feature = "testing")] + if progress.heatmap_mtime.is_none() { + // No heatmap might mean the attached location has never uploaded one, or that + // the secondary download hasn't happened yet. This is relatively unusual in the field, + // but fairly common in tests. + self.kick_secondary_download(tenant_shard_id).await; + } } else { // Location looks ready: proceed tracing::info!( @@ -6261,6 +6310,58 @@ impl Service { validated_work } + /// Some aspects of scheduling optimisation wait for secondary locations to be warm. This + /// happens on multi-minute timescales in the field, which is fine because optimisation is meant + /// to be a lazy background thing. However, when testing, it is not practical to wait around, so + /// we have this helper to move things along faster. + #[cfg(feature = "testing")] + async fn kick_secondary_download(&self, tenant_shard_id: TenantShardId) { + let (attached_node, secondary_node) = { + let locked = self.inner.read().unwrap(); + let Some(shard) = locked.tenants.get(&tenant_shard_id) else { + return; + }; + let (Some(attached), Some(secondary)) = ( + shard.intent.get_attached(), + shard.intent.get_secondary().first(), + ) else { + return; + }; + ( + locked.nodes.get(attached).unwrap().clone(), + locked.nodes.get(secondary).unwrap().clone(), + ) + }; + + // Make remote API calls to upload + download heatmaps: we ignore errors because this is just + // a 'kick' to let scheduling optimisation run more promptly. + attached_node + .with_client_retries( + |client| async move { client.tenant_heatmap_upload(tenant_shard_id).await }, + &self.config.jwt_token, + 3, + 10, + SHORT_RECONCILE_TIMEOUT, + &self.cancel, + ) + .await; + + secondary_node + .with_client_retries( + |client| async move { + client + .tenant_secondary_download(tenant_shard_id, Some(Duration::from_secs(1))) + .await + }, + &self.config.jwt_token, + 3, + 10, + SHORT_RECONCILE_TIMEOUT, + &self.cancel, + ) + .await; + } + /// Look for shards which are oversized and in need of splitting async fn autosplit_tenants(self: &Arc) { let Some(split_threshold) = self.config.split_threshold else { diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs index 8d855d263cfd..1b4ff01a170a 100644 --- a/storage_scrubber/src/checks.rs +++ b/storage_scrubber/src/checks.rs @@ -4,17 +4,21 @@ use itertools::Itertools; use pageserver::tenant::checks::check_valid_layermap; use pageserver::tenant::layer_map::LayerMap; use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata; +use pageserver::tenant::remote_timeline_client::manifest::TenantManifest; use pageserver_api::shard::ShardIndex; use tokio_util::sync::CancellationToken; use tracing::{info, warn}; use utils::generation::Generation; use utils::id::TimelineId; +use utils::shard::TenantShardId; use crate::cloud_admin_api::BranchData; use crate::metadata_stream::stream_listing; use crate::{download_object_with_retries, RootTarget, TenantShardTimelineId}; use futures_util::StreamExt; -use pageserver::tenant::remote_timeline_client::{parse_remote_index_path, remote_layer_path}; +use pageserver::tenant::remote_timeline_client::{ + parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path, +}; use pageserver::tenant::storage_layer::LayerName; use pageserver::tenant::IndexPart; use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath}; @@ -527,3 +531,132 @@ async fn list_timeline_blobs_impl( unknown_keys, })) } + +pub(crate) struct RemoteTenantManifestInfo { + pub(crate) latest_generation: Option, + pub(crate) manifests: Vec<(Generation, ListingObject)>, +} + +pub(crate) enum ListTenantManifestResult { + WithErrors { + errors: Vec<(String, String)>, + #[allow(dead_code)] + unknown_keys: Vec, + }, + NoErrors(RemoteTenantManifestInfo), +} + +/// Lists the tenant manifests in remote storage and parses the latest one, returning a [`ListTenantManifestResult`] object. +pub(crate) async fn list_tenant_manifests( + remote_client: &GenericRemoteStorage, + tenant_id: TenantShardId, + root_target: &RootTarget, +) -> anyhow::Result { + let mut errors = Vec::new(); + let mut unknown_keys = Vec::new(); + + let mut tenant_root_target = root_target.tenant_root(&tenant_id); + let original_prefix = tenant_root_target.prefix_in_bucket.clone(); + const TENANT_MANIFEST_STEM: &str = "tenant-manifest"; + tenant_root_target.prefix_in_bucket += TENANT_MANIFEST_STEM; + tenant_root_target.delimiter = String::new(); + + let mut manifests: Vec<(Generation, ListingObject)> = Vec::new(); + + let prefix_str = &original_prefix + .strip_prefix("/") + .unwrap_or(&original_prefix); + + let mut stream = std::pin::pin!(stream_listing(remote_client, &tenant_root_target)); + 'outer: while let Some(obj) = stream.next().await { + let (key, Some(obj)) = obj? else { + panic!("ListingObject not specified"); + }; + + 'err: { + // TODO a let chain would be nicer here. + let Some(name) = key.object_name() else { + break 'err; + }; + if !name.starts_with(TENANT_MANIFEST_STEM) { + break 'err; + } + let Some(generation) = parse_remote_tenant_manifest_path(key.clone()) else { + break 'err; + }; + tracing::debug!("tenant manifest {key}"); + manifests.push((generation, obj)); + continue 'outer; + } + tracing::info!("Listed an unknown key: {key}"); + unknown_keys.push(obj); + } + + if manifests.is_empty() { + tracing::debug!("No manifest for timeline."); + + return Ok(ListTenantManifestResult::WithErrors { + errors, + unknown_keys, + }); + } + if !unknown_keys.is_empty() { + errors.push(((*prefix_str).to_owned(), "unknown keys listed".to_string())); + + return Ok(ListTenantManifestResult::WithErrors { + errors, + unknown_keys, + }); + } + + // Find the manifest with the highest generation + let (latest_generation, latest_listing_object) = manifests + .iter() + .max_by_key(|i| i.0) + .map(|(g, obj)| (*g, obj.clone())) + .unwrap(); + + let manifest_bytes = + match download_object_with_retries(remote_client, &latest_listing_object.key).await { + Ok(bytes) => bytes, + Err(e) => { + // It is possible that the tenant gets deleted in-between we list the objects + // and we download the manifest file. + errors.push(( + latest_listing_object.key.get_path().as_str().to_owned(), + format!("failed to download tenant-manifest.json: {e}"), + )); + return Ok(ListTenantManifestResult::WithErrors { + errors, + unknown_keys, + }); + } + }; + + match TenantManifest::from_json_bytes(&manifest_bytes) { + Ok(_manifest) => { + return Ok(ListTenantManifestResult::NoErrors( + RemoteTenantManifestInfo { + latest_generation: Some(latest_generation), + manifests, + }, + )); + } + Err(parse_error) => errors.push(( + latest_listing_object.key.get_path().as_str().to_owned(), + format!("tenant-manifest.json body parsing error: {parse_error}"), + )), + } + + if errors.is_empty() { + errors.push(( + (*prefix_str).to_owned(), + "Unexpected: no errors did not lead to a successfully parsed blob return".to_string(), + )); + } + + Ok(ListTenantManifestResult::WithErrors { + errors, + unknown_keys, + }) +} diff --git a/storage_scrubber/src/cloud_admin_api.rs b/storage_scrubber/src/cloud_admin_api.rs index c9a62cd256d7..b1dfe3a53f28 100644 --- a/storage_scrubber/src/cloud_admin_api.rs +++ b/storage_scrubber/src/cloud_admin_api.rs @@ -1,3 +1,5 @@ +use std::error::Error as _; + use chrono::{DateTime, Utc}; use futures::Future; use hex::FromHex; @@ -30,14 +32,18 @@ impl std::fmt::Display for Error { match &self.kind { ErrorKind::RequestSend(e) => write!( f, - "Failed to send a request. Context: {}, error: {}", - self.context, e + "Failed to send a request. Context: {}, error: {}{}", + self.context, + e, + e.source().map(|e| format!(": {e}")).unwrap_or_default() ), ErrorKind::BodyRead(e) => { write!( f, - "Failed to read a request body. Context: {}, error: {}", - self.context, e + "Failed to read a request body. Context: {}, error: {}{}", + self.context, + e, + e.source().map(|e| format!(": {e}")).unwrap_or_default() ) } ErrorKind::ResponseStatus(status) => { diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs index 1fe4fc58cd0e..be526daaf0d1 100644 --- a/storage_scrubber/src/lib.rs +++ b/storage_scrubber/src/lib.rs @@ -268,7 +268,7 @@ impl BucketConfig { config.bucket_name, config.bucket_region ), RemoteStorageKind::AzureContainer(config) => format!( - "bucket {}, storage account {:?}, region {}", + "container {}, storage account {:?}, region {}", config.container_name, config.storage_account, config.container_region ), } diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs index 1e69ddbf150c..20cb9c3633ac 100644 --- a/storage_scrubber/src/pageserver_physical_gc.rs +++ b/storage_scrubber/src/pageserver_physical_gc.rs @@ -2,12 +2,16 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::sync::Arc; use std::time::Duration; -use crate::checks::{list_timeline_blobs, BlobDataParseResult}; +use crate::checks::{ + list_tenant_manifests, list_timeline_blobs, BlobDataParseResult, ListTenantManifestResult, +}; use crate::metadata_stream::{stream_tenant_timelines, stream_tenants}; use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId, MAX_RETRIES}; use futures_util::{StreamExt, TryStreamExt}; use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata; -use pageserver::tenant::remote_timeline_client::{parse_remote_index_path, remote_layer_path}; +use pageserver::tenant::remote_timeline_client::{ + parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path, +}; use pageserver::tenant::storage_layer::LayerName; use pageserver::tenant::IndexPart; use pageserver_api::controller_api::TenantDescribeResponse; @@ -25,6 +29,7 @@ use utils::id::{TenantId, TenantTimelineId}; #[derive(Serialize, Default)] pub struct GcSummary { indices_deleted: usize, + tenant_manifests_deleted: usize, remote_storage_errors: usize, controller_api_errors: usize, ancestor_layers_deleted: usize, @@ -34,12 +39,14 @@ impl GcSummary { fn merge(&mut self, other: Self) { let Self { indices_deleted, + tenant_manifests_deleted, remote_storage_errors, ancestor_layers_deleted, controller_api_errors, } = other; self.indices_deleted += indices_deleted; + self.tenant_manifests_deleted += tenant_manifests_deleted; self.remote_storage_errors += remote_storage_errors; self.ancestor_layers_deleted += ancestor_layers_deleted; self.controller_api_errors += controller_api_errors; @@ -352,6 +359,69 @@ async fn maybe_delete_index( } } +async fn maybe_delete_tenant_manifest( + remote_client: &GenericRemoteStorage, + min_age: &Duration, + latest_gen: Generation, + obj: &ListingObject, + mode: GcMode, + summary: &mut GcSummary, +) { + // Validation: we will only delete things that parse cleanly + let basename = obj.key.get_path().file_name().unwrap(); + let Some(candidate_generation) = + parse_remote_tenant_manifest_path(RemotePath::from_string(basename).unwrap()) + else { + // A strange key: we will not delete this because we don't understand it. + tracing::warn!("Bad index key"); + return; + }; + + // Validation: we will only delete manifests more than one generation old, and in fact we + // should never be called with such recent generations. + if candidate_generation >= latest_gen { + tracing::warn!("Deletion candidate is >= latest generation, this is a bug!"); + return; + } else if candidate_generation.next() == latest_gen { + tracing::warn!("Deletion candidate is >= latest generation - 1, this is a bug!"); + return; + } + + if !is_old_enough(min_age, obj, summary) { + return; + } + + if matches!(mode, GcMode::DryRun) { + tracing::info!("Dry run: would delete this key"); + return; + } + + // All validations passed: erase the object + let cancel = CancellationToken::new(); + match backoff::retry( + || remote_client.delete(&obj.key, &cancel), + |_| false, + 3, + MAX_RETRIES as u32, + "maybe_delete_tenant_manifest", + &cancel, + ) + .await + { + None => { + unreachable!("Using a dummy cancellation token"); + } + Some(Ok(_)) => { + tracing::info!("Successfully deleted tenant manifest"); + summary.tenant_manifests_deleted += 1; + } + Some(Err(e)) => { + tracing::warn!("Failed to delete tenant manifest: {e}"); + summary.remote_storage_errors += 1; + } + } +} + #[allow(clippy::too_many_arguments)] async fn gc_ancestor( remote_client: &GenericRemoteStorage, @@ -451,13 +521,100 @@ async fn gc_ancestor( Ok(()) } +async fn gc_tenant_manifests( + remote_client: &GenericRemoteStorage, + min_age: Duration, + target: &RootTarget, + mode: GcMode, + tenant_shard_id: TenantShardId, +) -> anyhow::Result { + let mut gc_summary = GcSummary::default(); + match list_tenant_manifests(remote_client, tenant_shard_id, target).await? { + ListTenantManifestResult::WithErrors { + errors, + unknown_keys: _, + } => { + for (_key, error) in errors { + tracing::warn!(%tenant_shard_id, "list_tenant_manifests: {error}"); + } + } + ListTenantManifestResult::NoErrors(mut manifest_info) => { + let Some(latest_gen) = manifest_info.latest_generation else { + return Ok(gc_summary); + }; + manifest_info + .manifests + .sort_by_key(|(generation, _obj)| *generation); + // skip the two latest generations (they don't neccessarily have to be 1 apart from each other) + let candidates = manifest_info.manifests.iter().rev().skip(2); + for (_generation, key) in candidates { + maybe_delete_tenant_manifest( + remote_client, + &min_age, + latest_gen, + key, + mode, + &mut gc_summary, + ) + .instrument( + info_span!("maybe_delete_tenant_manifest", %tenant_shard_id, ?latest_gen, %key.key), + ) + .await; + } + } + } + Ok(gc_summary) +} + +async fn gc_timeline( + remote_client: &GenericRemoteStorage, + min_age: &Duration, + target: &RootTarget, + mode: GcMode, + ttid: TenantShardTimelineId, + accumulator: &Arc>, +) -> anyhow::Result { + let mut summary = GcSummary::default(); + let data = list_timeline_blobs(remote_client, ttid, target).await?; + + let (index_part, latest_gen, candidates) = match &data.blob_data { + BlobDataParseResult::Parsed { + index_part, + index_part_generation, + s3_layers: _s3_layers, + } => (index_part, *index_part_generation, data.unused_index_keys), + BlobDataParseResult::Relic => { + // Post-deletion tenant location: don't try and GC it. + return Ok(summary); + } + BlobDataParseResult::Incorrect { + errors, + s3_layers: _, + } => { + // Our primary purpose isn't to report on bad data, but log this rather than skipping silently + tracing::warn!("Skipping timeline {ttid}, bad metadata: {errors:?}"); + return Ok(summary); + } + }; + + accumulator.lock().unwrap().update(ttid, index_part); + + for key in candidates { + maybe_delete_index(remote_client, min_age, latest_gen, &key, mode, &mut summary) + .instrument(info_span!("maybe_delete_index", %ttid, ?latest_gen, %key.key)) + .await; + } + + Ok(summary) +} + /// Physical garbage collection: removing unused S3 objects. /// /// This is distinct from the garbage collection done inside the pageserver, which operates at a higher level /// (keys, layers). This type of garbage collection is about removing: /// - Objects that were uploaded but never referenced in the remote index (e.g. because of a shutdown between /// uploading a layer and uploading an index) -/// - Index objects from historic generations +/// - Index objects and tenant manifests from historic generations /// /// This type of GC is not necessary for correctness: rather it serves to reduce wasted storage capacity, and /// make sure that object listings don't get slowed down by large numbers of garbage objects. @@ -470,6 +627,7 @@ pub async fn pageserver_physical_gc( ) -> anyhow::Result { let (remote_client, target) = init_remote(bucket_config.clone(), NodeKind::Pageserver).await?; + let remote_client = Arc::new(remote_client); let tenants = if tenant_shard_ids.is_empty() { futures::future::Either::Left(stream_tenants(&remote_client, &target)) } else { @@ -484,59 +642,59 @@ pub async fn pageserver_physical_gc( let accumulator = Arc::new(std::sync::Mutex::new(TenantRefAccumulator::default())); // Generate a stream of TenantTimelineId - let timelines = tenants.map_ok(|t| stream_tenant_timelines(&remote_client, &target, t)); - let timelines = timelines.try_buffered(CONCURRENCY); - let timelines = timelines.try_flatten(); - - // Generate a stream of S3TimelineBlobData - async fn gc_timeline( - remote_client: &GenericRemoteStorage, - min_age: &Duration, - target: &RootTarget, - mode: GcMode, - ttid: TenantShardTimelineId, - accumulator: &Arc>, - ) -> anyhow::Result { - let mut summary = GcSummary::default(); - let data = list_timeline_blobs(remote_client, ttid, target).await?; - - let (index_part, latest_gen, candidates) = match &data.blob_data { - BlobDataParseResult::Parsed { - index_part, - index_part_generation, - s3_layers: _s3_layers, - } => (index_part, *index_part_generation, data.unused_index_keys), - BlobDataParseResult::Relic => { - // Post-deletion tenant location: don't try and GC it. - return Ok(summary); - } - BlobDataParseResult::Incorrect { - errors, - s3_layers: _, - } => { - // Our primary purpose isn't to report on bad data, but log this rather than skipping silently - tracing::warn!("Skipping timeline {ttid}, bad metadata: {errors:?}"); - return Ok(summary); - } - }; - - accumulator.lock().unwrap().update(ttid, index_part); - - for key in candidates { - maybe_delete_index(remote_client, min_age, latest_gen, &key, mode, &mut summary) - .instrument(info_span!("maybe_delete_index", %ttid, ?latest_gen, %key.key)) - .await; - } - - Ok(summary) + enum GcSummaryOrContent { + Content(T), + GcSummary(GcSummary), } + let timelines = tenants.map_ok(|tenant_shard_id| { + let target_ref = ⌖ + let remote_client_ref = &remote_client; + async move { + let summaries_from_manifests = match gc_tenant_manifests( + remote_client_ref, + min_age, + target_ref, + mode, + tenant_shard_id, + ) + .await + { + Ok(gc_summary) => vec![Ok(GcSummaryOrContent::::GcSummary( + gc_summary, + ))], + Err(e) => { + tracing::warn!(%tenant_shard_id, "Error in gc_tenant_manifests: {e}"); + Vec::new() + } + }; + stream_tenant_timelines(remote_client_ref, target_ref, tenant_shard_id) + .await + .map(|stream| { + stream + .map_ok(GcSummaryOrContent::Content) + .chain(futures::stream::iter(summaries_from_manifests.into_iter())) + }) + } + }); + let timelines = std::pin::pin!(timelines.try_buffered(CONCURRENCY)); + let timelines = timelines.try_flatten(); let mut summary = GcSummary::default(); // Drain futures for per-shard GC, populating accumulator as a side effect { - let timelines = timelines.map_ok(|ttid| { - gc_timeline(&remote_client, &min_age, &target, mode, ttid, &accumulator) + let timelines = timelines.map_ok(|summary_or_ttid| match summary_or_ttid { + GcSummaryOrContent::Content(ttid) => futures::future::Either::Left(gc_timeline( + &remote_client, + &min_age, + &target, + mode, + ttid, + &accumulator, + )), + GcSummaryOrContent::GcSummary(gc_summary) => { + futures::future::Either::Right(futures::future::ok(gc_summary)) + } }); let mut timelines = std::pin::pin!(timelines.try_buffered(CONCURRENCY)); diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index bb8e75902e77..fa3747c08f29 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -266,6 +266,16 @@ def record( name = f"{self.PROPERTY_PREFIX}_{metric_name}" if labels is None: labels = {} + + # Sometimes mypy can't catch non-numeric values, + # so adding a check here + try: + float(metric_value) + except ValueError as e: + raise ValueError( + f"`metric_value` (`{metric_value}`) must be a NUMERIC-friendly data type" + ) from e + self.property_recorder( name, { diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 3f90c233a635..a591e088eff7 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -152,6 +152,8 @@ def counter(name: str) -> str: "pageserver_resident_physical_size", "pageserver_io_operations_bytes_total", "pageserver_last_record_lsn", + "pageserver_disk_consistent_lsn", + "pageserver_projected_remote_consistent_lsn", "pageserver_standby_horizon", "pageserver_smgr_query_seconds_bucket", "pageserver_smgr_query_seconds_count", @@ -173,6 +175,9 @@ def counter(name: str) -> str: counter("pageserver_tenant_throttling_count_accounted_finish"), counter("pageserver_tenant_throttling_wait_usecs_sum"), counter("pageserver_tenant_throttling_count"), + counter("pageserver_timeline_wal_records_received"), + counter("pageserver_page_service_pagestream_flush_in_progress_micros"), + *histogram("pageserver_page_service_batch_size"), *PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS, # "pageserver_directory_entries_count", -- only used if above a certain threshold # "pageserver_broken_tenants_count" -- used only for broken diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 9bcfffeb9cf5..60c4a2393609 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -269,7 +269,7 @@ async def connect_async(self, **kwargs: Any) -> asyncpg.Connection: for match in re.finditer(r"-c(\w*)=(\w*)", options): key = match.group(1) val = match.group(2) - if "server_options" in conn_options: + if "server_settings" in conn_options: conn_options["server_settings"].update({key: val}) else: conn_options["server_settings"] = {key: val} @@ -1095,6 +1095,17 @@ def __init__(self, config: NeonEnvBuilder): # the pageserver taking a long time to start up due to syncfs flushing other tests' data "no_sync": True, } + + # Batching (https://github.com/neondatabase/neon/issues/9377): + # enable batching by default in tests and benchmarks. + # Compat tests are exempt because old versions fail to parse the new config. + if not config.compatibility_neon_binpath: + ps_cfg["page_service_pipelining"] = { + "mode": "pipelined", + "execution": "concurrent-futures", + "max_batch_size": 32, + } + if self.pageserver_virtual_file_io_engine is not None: ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine if config.pageserver_default_tenant_config_compaction_algorithm is not None: @@ -1736,7 +1747,7 @@ def wait_until_ready(self): def storage_controller_ready(): assert self.ready() is True - wait_until(30, 1, storage_controller_ready) + wait_until(storage_controller_ready) return time.time() - t1 def attach_hook_issue( @@ -2574,7 +2585,7 @@ def complete(): log.info(f"any_unstable={any_unstable}") assert not any_unstable - wait_until(20, 0.5, complete) + wait_until(complete) def __enter__(self) -> Self: return self @@ -3801,13 +3812,11 @@ def create( assert size_to_bytes(size) >= size_to_bytes( "1MB" ), "LFC size cannot be set less than 1MB" - # shared_buffers = 512kB to make postgres use LFC intensively - # neon.max_file_cache_size and neon.file_cache size limit are - # set to 1MB because small LFC is better for testing (helps to find more problems) lfc_path_escaped = str(lfc_path).replace("'", "''") config_lines = [ - "shared_buffers = 512kB", f"neon.file_cache_path = '{lfc_path_escaped}'", + # neon.max_file_cache_size and neon.file_cache size limits are + # set to 1MB because small LFC is better for testing (helps to find more problems) "neon.max_file_cache_size = 1MB", "neon.file_cache_size_limit = 1MB", ] + config_lines @@ -3973,7 +3982,7 @@ def check_migrations_done(): migration_id: int = cur.fetchall()[0][0] assert migration_id >= num_migrations - wait_until(20, 0.5, check_migrations_done) + wait_until(check_migrations_done) # Mock the extension part of spec passed from control plane for local testing # endpooint.rs adds content of this file as a part of the spec.json @@ -4489,12 +4498,10 @@ def are_lsns_advanced(): ) assert stat.remote_consistent_lsn >= lsn and stat.backup_lsn >= lsn.segment_lsn() - # xxx: max wait is long because we might be waiting for reconnection from - # pageserver to this safekeeper - wait_until(30, 1, are_lsns_advanced) + wait_until(are_lsns_advanced) client.checkpoint(tenant_id, timeline_id) if wait_wal_removal: - wait_until(30, 1, are_segments_removed) + wait_until(are_segments_removed) def wait_until_paused(self, failpoint: str): msg = f"at failpoint {failpoint}" @@ -4503,7 +4510,7 @@ def paused(): log.info(f"waiting for hitting failpoint {failpoint}") self.assert_log_contains(msg) - wait_until(20, 0.5, paused) + wait_until(paused) class NeonBroker(LogUtils): diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 4cf3ece39634..0832eac22f2d 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -850,6 +850,7 @@ def timeline_checkpoint( force_repartition=False, force_image_layer_creation=False, force_l0_compaction=False, + wait_until_flushed=True, wait_until_uploaded=False, compact: bool | None = None, **kwargs, @@ -862,6 +863,8 @@ def timeline_checkpoint( query["force_image_layer_creation"] = "true" if force_l0_compaction: query["force_l0_compaction"] = "true" + if not wait_until_flushed: + query["wait_until_flushed"] = "false" if wait_until_uploaded: query["wait_until_uploaded"] = "true" @@ -869,7 +872,7 @@ def timeline_checkpoint( query["compact"] = "true" if compact else "false" log.info( - f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}, wait_until_uploaded={wait_until_uploaded}" + f"Requesting checkpoint: tenant={tenant_id} timeline={timeline_id} wait_until_flushed={wait_until_flushed} wait_until_uploaded={wait_until_uploaded} compact={compact}" ) res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint", diff --git a/test_runner/fixtures/pageserver/utils.py b/test_runner/fixtures/pageserver/utils.py index 46700e3fe377..66f61f9b4c37 100644 --- a/test_runner/fixtures/pageserver/utils.py +++ b/test_runner/fixtures/pageserver/utils.py @@ -13,7 +13,7 @@ from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineId from fixtures.log_helper import log from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient -from fixtures.remote_storage import RemoteStorage, RemoteStorageKind, S3Storage +from fixtures.remote_storage import RemoteStorage, S3Storage from fixtures.utils import wait_until if TYPE_CHECKING: @@ -54,23 +54,15 @@ def wait_for_upload( tenant: TenantId | TenantShardId, timeline: TimelineId, lsn: Lsn, + timeout=20, ): - """waits for local timeline upload up to specified lsn""" + """Waits for local timeline upload up to specified LSN""" - current_lsn = Lsn(0) - for i in range(20): - current_lsn = remote_consistent_lsn(pageserver_http, tenant, timeline) - if current_lsn >= lsn: - log.info("wait finished") - return - lr_lsn = last_record_lsn(pageserver_http, tenant, timeline) - log.info( - f"waiting for remote_consistent_lsn to reach {lsn}, now {current_lsn}, last_record_lsn={lr_lsn}, iteration {i + 1}" - ) - time.sleep(1) - raise Exception( - f"timed out while waiting for {tenant}/{timeline} remote_consistent_lsn to reach {lsn}, was {current_lsn}" - ) + def is_uploaded(): + remote_lsn = remote_consistent_lsn(pageserver_http, tenant, timeline) + assert remote_lsn >= lsn, f"remote_consistent_lsn at {remote_lsn}" + + wait_until(is_uploaded, name=f"upload to {lsn}", timeout=timeout) def _tenant_in_expected_state(tenant_info: dict[str, Any], expected_state: str): @@ -269,12 +261,7 @@ def wait_timeline_detail_404( pageserver_http: PageserverHttpClient, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, - iterations: int, - interval: float | None = None, ): - if interval is None: - interval = 0.25 - def timeline_is_missing(): data = {} try: @@ -287,19 +274,17 @@ def timeline_is_missing(): raise RuntimeError(f"Timeline exists state {data.get('state')}") - wait_until(iterations, interval, func=timeline_is_missing) + wait_until(timeline_is_missing) def timeline_delete_wait_completed( pageserver_http: PageserverHttpClient, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, - iterations: int = 20, - interval: float | None = None, **delete_args, ) -> None: pageserver_http.timeline_delete(tenant_id=tenant_id, timeline_id=timeline_id, **delete_args) - wait_timeline_detail_404(pageserver_http, tenant_id, timeline_id, iterations, interval) + wait_timeline_detail_404(pageserver_http, tenant_id, timeline_id) # remote_storage must not be None, but that's easier for callers to make mypy happy @@ -453,7 +438,3 @@ def many_small_layers_tenant_config() -> dict[str, Any]: "checkpoint_distance": 1024**2, "image_creation_threshold": 100, } - - -def poll_for_remote_storage_iterations(remote_storage_kind: RemoteStorageKind) -> int: - return 40 if remote_storage_kind is RemoteStorageKind.REAL_S3 else 15 diff --git a/test_runner/fixtures/safekeeper/http.py b/test_runner/fixtures/safekeeper/http.py index 094188c0b5f5..286f80ba69f1 100644 --- a/test_runner/fixtures/safekeeper/http.py +++ b/test_runner/fixtures/safekeeper/http.py @@ -175,7 +175,7 @@ def timeline_start_lsn_non_zero() -> Lsn: assert s > Lsn(0) return s - return wait_until(30, 1, timeline_start_lsn_non_zero) + return wait_until(timeline_start_lsn_non_zero) def get_commit_lsn(self, tenant_id: TenantId, timeline_id: TimelineId) -> Lsn: return self.timeline_status(tenant_id, timeline_id).commit_lsn diff --git a/test_runner/fixtures/safekeeper/utils.py b/test_runner/fixtures/safekeeper/utils.py index 024691647033..922cdedccc21 100644 --- a/test_runner/fixtures/safekeeper/utils.py +++ b/test_runner/fixtures/safekeeper/utils.py @@ -19,4 +19,4 @@ def walreceivers_absent(): log.info(f"waiting for walreceivers to be gone, currently {status.walreceivers}") assert len(status.walreceivers) == 0 - wait_until(30, 0.5, walreceivers_absent) + wait_until(walreceivers_absent) diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index 04e98fe494db..c34ac298d1cc 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -9,6 +9,7 @@ import threading import time from collections.abc import Callable, Iterable +from datetime import datetime, timedelta from hashlib import sha256 from pathlib import Path from typing import TYPE_CHECKING, Any, TypeVar @@ -380,15 +381,10 @@ def start_in_background( if return_code is not None: error = f"expected subprocess to run but it exited with code {return_code}" else: - attempts = 10 try: - wait_until( - number_of_iterations=attempts, - interval=1, - func=is_started, - ) + wait_until(is_started, timeout=10) except Exception: - error = f"Failed to get correct status from subprocess in {attempts} attempts" + error = "Failed to get correct status from subprocess" except Exception as e: error = f"expected subprocess to start but it failed with exception: {e}" @@ -402,28 +398,31 @@ def start_in_background( def wait_until( - number_of_iterations: int, - interval: float, func: Callable[[], WaitUntilRet], - show_intermediate_error: bool = False, + name: str | None = None, + timeout: float = 20.0, # seconds + interval: float = 0.5, # seconds + status_interval: float = 1.0, # seconds ) -> WaitUntilRet: """ Wait until 'func' returns successfully, without exception. Returns the last return value from the function. """ + if name is None: + name = getattr(func, "__name__", repr(func)) + deadline = datetime.now() + timedelta(seconds=timeout) + next_status = datetime.now() last_exception = None - for i in range(number_of_iterations): + while datetime.now() <= deadline: try: - res = func() + return func() except Exception as e: - log.info("waiting for %s iteration %s failed: %s", func, i + 1, e) + if datetime.now() >= next_status: + log.info("waiting for %s: %s", name, e) + next_status = datetime.now() + timedelta(seconds=status_interval) last_exception = e - if show_intermediate_error: - log.info(e) time.sleep(interval) - continue - return res - raise Exception(f"timed out while waiting for {func}") from last_exception + raise Exception(f"timed out while waiting for {name}") from last_exception def assert_eq(a, b) -> None: diff --git a/test_runner/logical_repl/test_clickhouse.py b/test_runner/logical_repl/test_clickhouse.py index 8e03bbe5d4f1..6b522fa46d22 100644 --- a/test_runner/logical_repl/test_clickhouse.py +++ b/test_runner/logical_repl/test_clickhouse.py @@ -60,24 +60,22 @@ def test_clickhouse(remote_pg: RemotePostgres): "SETTINGS materialized_postgresql_tables_list = 'table1';" ) wait_until( - 120, - 0.5, lambda: query_clickhouse( client, "select * from db1_postgres.table1 order by 1", "ee600d8f7cd05bd0b169fa81f44300a9dd10085a", ), + timeout=60, ) cur.execute("INSERT INTO table1 (id, column1) VALUES (3, 'ghi'), (4, 'jkl');") conn.commit() wait_until( - 120, - 0.5, lambda: query_clickhouse( client, "select * from db1_postgres.table1 order by 1", "9eba2daaf7e4d7d27ac849525f68b562ab53947d", ), + timeout=60, ) log.debug("Sleeping before final checking if Neon is still alive") time.sleep(3) diff --git a/test_runner/logical_repl/test_debezium.py b/test_runner/logical_repl/test_debezium.py index d2cb087c92f5..8023d64d3d73 100644 --- a/test_runner/logical_repl/test_debezium.py +++ b/test_runner/logical_repl/test_debezium.py @@ -148,14 +148,12 @@ def test_debezium(debezium): ) conn.commit() wait_until( - 100, - 0.5, lambda: get_kafka_msg( consumer, ts_ms, after={"first_name": "John", "last_name": "Dow", "email": "johndow@example.com"}, ), - show_intermediate_error=True, + timeout=60, ) ts_ms = time.time() * 1000 log.info("Insert 2 ts_ms: %s", ts_ms) @@ -165,28 +163,24 @@ def test_debezium(debezium): ) conn.commit() wait_until( - 100, - 0.5, lambda: get_kafka_msg( consumer, ts_ms, after={"first_name": "Alex", "last_name": "Row", "email": "alexrow@example.com"}, ), - show_intermediate_error=True, + timeout=60, ) ts_ms = time.time() * 1000 log.info("Update ts_ms: %s", ts_ms) cur.execute("update inventory.customers set first_name = 'Alexander' where id = 2") conn.commit() wait_until( - 100, - 0.5, lambda: get_kafka_msg( consumer, ts_ms, after={"first_name": "Alexander"}, ), - show_intermediate_error=True, + timeout=60, ) time.sleep(3) cur.execute("select 1") diff --git a/test_runner/performance/pageserver/test_page_service_batching.py b/test_runner/performance/pageserver/test_page_service_batching.py index c47a849fec7c..2c27368001b3 100644 --- a/test_runner/performance/pageserver/test_page_service_batching.py +++ b/test_runner/performance/pageserver/test_page_service_batching.py @@ -116,21 +116,18 @@ def test_throughput( # name is not a metric, we just use it to identify the test easily in the `test_...[...]`` notation } ) - params.update( - { - f"pipelining_config.{k}": (v, {}) - for k, v in dataclasses.asdict(pipelining_config).items() - } - ) + # For storing configuration as a metric, insert a fake 0 with labels with actual data + params.update({"pipelining_config": (0, {"labels": dataclasses.asdict(pipelining_config)})}) log.info("params: %s", params) for param, (value, kwargs) in params.items(): zenbenchmark.record( param, - metric_value=value, + metric_value=float(value), unit=kwargs.pop("unit", ""), report=MetricReport.TEST_PARAM, + labels=kwargs.pop("labels", None), **kwargs, ) @@ -167,18 +164,18 @@ def test_throughput( @dataclass class Metrics: time: float - pageserver_getpage_count: float - pageserver_vectored_get_count: float + pageserver_batch_size_histo_sum: float + pageserver_batch_size_histo_count: float compute_getpage_count: float pageserver_cpu_seconds_total: float def __sub__(self, other: "Metrics") -> "Metrics": return Metrics( time=self.time - other.time, - pageserver_getpage_count=self.pageserver_getpage_count - - other.pageserver_getpage_count, - pageserver_vectored_get_count=self.pageserver_vectored_get_count - - other.pageserver_vectored_get_count, + pageserver_batch_size_histo_sum=self.pageserver_batch_size_histo_sum + - other.pageserver_batch_size_histo_sum, + pageserver_batch_size_histo_count=self.pageserver_batch_size_histo_count + - other.pageserver_batch_size_histo_count, compute_getpage_count=self.compute_getpage_count - other.compute_getpage_count, pageserver_cpu_seconds_total=self.pageserver_cpu_seconds_total - other.pageserver_cpu_seconds_total, @@ -187,8 +184,8 @@ def __sub__(self, other: "Metrics") -> "Metrics": def normalize(self, by) -> "Metrics": return Metrics( time=self.time / by, - pageserver_getpage_count=self.pageserver_getpage_count / by, - pageserver_vectored_get_count=self.pageserver_vectored_get_count / by, + pageserver_batch_size_histo_sum=self.pageserver_batch_size_histo_sum / by, + pageserver_batch_size_histo_count=self.pageserver_batch_size_histo_count / by, compute_getpage_count=self.compute_getpage_count / by, pageserver_cpu_seconds_total=self.pageserver_cpu_seconds_total / by, ) @@ -202,11 +199,11 @@ def get_metrics() -> Metrics: pageserver_metrics = ps_http.get_metrics() return Metrics( time=time.time(), - pageserver_getpage_count=pageserver_metrics.query_one( - "pageserver_smgr_query_seconds_count", {"smgr_query_type": "get_page_at_lsn"} + pageserver_batch_size_histo_sum=pageserver_metrics.query_one( + "pageserver_page_service_batch_size_sum" ).value, - pageserver_vectored_get_count=pageserver_metrics.query_one( - "pageserver_get_vectored_seconds_count", {"task_kind": "PageRequestHandler"} + pageserver_batch_size_histo_count=pageserver_metrics.query_one( + "pageserver_page_service_batch_size_count" ).value, compute_getpage_count=compute_getpage_count, pageserver_cpu_seconds_total=pageserver_metrics.query_one( @@ -243,7 +240,7 @@ def workload() -> Metrics: # Sanity-checks on the collected data # # assert that getpage counts roughly match between compute and ps - assert metrics.pageserver_getpage_count == pytest.approx( + assert metrics.pageserver_batch_size_histo_sum == pytest.approx( metrics.compute_getpage_count, rel=0.01 ) @@ -256,7 +253,7 @@ def workload() -> Metrics: zenbenchmark.record( "perfmetric.batching_factor", - metrics.pageserver_getpage_count / metrics.pageserver_vectored_get_count, + metrics.pageserver_batch_size_histo_sum / metrics.pageserver_batch_size_histo_count, unit="", report=MetricReport.HIGHER_IS_BETTER, ) diff --git a/test_runner/performance/test_branch_creation.py b/test_runner/performance/test_branch_creation.py index c50c4ad4324e..cf2212d447f2 100644 --- a/test_runner/performance/test_branch_creation.py +++ b/test_runner/performance/test_branch_creation.py @@ -137,15 +137,14 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape: startup_line = "INFO version: git(-env)?:" # find the first line of the log file so we can find the next start later - _, first_start = wait_until(5, 1, lambda: env.pageserver.assert_log_contains(startup_line)) + _, first_start = wait_until(lambda: env.pageserver.assert_log_contains(startup_line)) # start without gc so we can time compaction with less noise; use shorter # period for compaction so it starts earlier def patch_default_tenant_config(config): - tenant_config = config.get("tenant_config", {}) + tenant_config = config.setdefault("tenant_config", {}) tenant_config["compaction_period"] = "3s" tenant_config["gc_period"] = "0s" - config["tenant_config"] = tenant_config env.pageserver.edit_config_toml(patch_default_tenant_config) env.pageserver.start( @@ -156,7 +155,7 @@ def patch_default_tenant_config(config): ) _, second_start = wait_until( - 5, 1, lambda: env.pageserver.assert_log_contains(startup_line, first_start) + lambda: env.pageserver.assert_log_contains(startup_line, first_start), ) env.pageserver.quiesce_tenants() @@ -164,8 +163,6 @@ def patch_default_tenant_config(config): # wait for compaction to complete, which most likely has already done so multiple times msg, _ = wait_until( - 30, - 1, lambda: env.pageserver.assert_log_contains( f".*tenant_id={env.initial_tenant}.*: compaction iteration complete.*", second_start ), @@ -205,7 +202,7 @@ def metrics_are_filled() -> list[Sample]: assert len(matching) == len(expected_labels) return matching - samples = wait_until(10, 1, metrics_are_filled) + samples = wait_until(metrics_are_filled) for sample in samples: phase = sample.labels["phase"] diff --git a/test_runner/performance/test_ingest_insert_bulk.py b/test_runner/performance/test_ingest_insert_bulk.py new file mode 100644 index 000000000000..283bcada31bd --- /dev/null +++ b/test_runner/performance/test_ingest_insert_bulk.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +import random +from concurrent.futures import ThreadPoolExecutor + +import pytest +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.common_types import Lsn +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + NeonEnvBuilder, + wait_for_last_flush_lsn, +) +from fixtures.pageserver.utils import ( + wait_for_last_record_lsn, + wait_for_upload, + wait_for_upload_queue_empty, +) +from fixtures.remote_storage import s3_storage + + +@pytest.mark.timeout(900) +@pytest.mark.parametrize("size", [8, 1024, 8192]) +@pytest.mark.parametrize("s3", [True, False], ids=["s3", "local"]) +@pytest.mark.parametrize("backpressure", [True, False], ids=["backpressure", "nobackpressure"]) +@pytest.mark.parametrize("fsync", [True, False], ids=["fsync", "nofsync"]) +def test_ingest_insert_bulk( + request: pytest.FixtureRequest, + neon_env_builder: NeonEnvBuilder, + zenbenchmark: NeonBenchmarker, + fsync: bool, + backpressure: bool, + s3: bool, + size: int, +): + """ + Benchmarks ingestion of 5 GB of sequential insert WAL. Measures ingestion and S3 upload + separately. Also does a Safekeeper→Pageserver re-ingestion to measure Pageserver ingestion in + isolation. + """ + + CONCURRENCY = 1 # 1 is optimal without fsync or backpressure + VOLUME = 5 * 1024**3 + rows = VOLUME // (size + 64) # +64 roughly accounts for per-row WAL overhead + + neon_env_builder.safekeepers_enable_fsync = fsync + + if s3: + neon_env_builder.enable_pageserver_remote_storage(s3_storage()) + # NB: don't use S3 for Safekeeper. It doesn't affect throughput (no backpressure), but it + # would compete with Pageserver for bandwidth. + # neon_env_builder.enable_safekeeper_remote_storage(s3_storage()) + + neon_env_builder.disable_scrub_on_exit() # immediate shutdown may leave stray layers + env = neon_env_builder.init_start() + + endpoint = env.endpoints.create_start( + "main", + config_lines=[ + f"fsync = {fsync}", + "max_replication_apply_lag = 0", + f"max_replication_flush_lag = {'10GB' if backpressure else '0'}", + # NB: neon_local defaults to 15MB, which is too slow -- production uses 500MB. + f"max_replication_write_lag = {'500MB' if backpressure else '0'}", + ], + ) + endpoint.safe_psql("create extension neon") + + # Wait for the timeline to be propagated to the pageserver. + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, env.initial_timeline) + + # Ingest rows. + log.info("Ingesting data") + start_lsn = Lsn(endpoint.safe_psql("select pg_current_wal_lsn()")[0][0]) + + def insert_rows(endpoint, table, count, value): + with endpoint.connect().cursor() as cur: + cur.execute("set statement_timeout = 0") + cur.execute(f"create table {table} (id int, data bytea)") + cur.execute(f"insert into {table} values (generate_series(1, {count}), %s)", (value,)) + + with zenbenchmark.record_duration("upload"): + with zenbenchmark.record_duration("ingest"): + with ThreadPoolExecutor(max_workers=CONCURRENCY) as pool: + for i in range(CONCURRENCY): + # Write a random value for all rows. This is sufficient to prevent compression, + # e.g. in TOAST. Randomly generating every row is too slow. + value = random.randbytes(size) + worker_rows = rows / CONCURRENCY + pool.submit(insert_rows, endpoint, f"table{i}", worker_rows, value) + + end_lsn = Lsn(endpoint.safe_psql("select pg_current_wal_lsn()")[0][0]) + + # Wait for pageserver to ingest the WAL. + client = env.pageserver.http_client() + wait_for_last_record_lsn(client, env.initial_tenant, env.initial_timeline, end_lsn) + + # Wait for pageserver S3 upload. Checkpoint to flush the last in-memory layer. + client.timeline_checkpoint( + env.initial_tenant, + env.initial_timeline, + compact=False, + wait_until_flushed=False, + ) + wait_for_upload(client, env.initial_tenant, env.initial_timeline, end_lsn, timeout=600) + + # Empty out upload queue for next benchmark. + wait_for_upload_queue_empty(client, env.initial_tenant, env.initial_timeline) + + backpressure_time = endpoint.safe_psql("select backpressure_throttling_time()")[0][0] + + # Now that all data is ingested, delete and recreate the tenant in the pageserver. This will + # reingest all the WAL directly from the safekeeper. This gives us a baseline of how fast the + # pageserver can ingest this WAL in isolation. + status = env.storage_controller.inspect(tenant_shard_id=env.initial_tenant) + assert status is not None + + endpoint.stop() # avoid spurious getpage errors + client.tenant_delete(env.initial_tenant) + env.pageserver.tenant_create(tenant_id=env.initial_tenant, generation=status[0]) + + with zenbenchmark.record_duration("recover"): + log.info("Recovering WAL into pageserver") + client.timeline_create(env.pg_version, env.initial_tenant, env.initial_timeline) + wait_for_last_record_lsn(client, env.initial_tenant, env.initial_timeline, end_lsn) + + # Emit metrics. + wal_written_mb = round((end_lsn - start_lsn) / (1024 * 1024)) + zenbenchmark.record("wal_written", wal_written_mb, "MB", MetricReport.TEST_PARAM) + zenbenchmark.record("row_count", rows, "rows", MetricReport.TEST_PARAM) + zenbenchmark.record("concurrency", CONCURRENCY, "clients", MetricReport.TEST_PARAM) + zenbenchmark.record( + "backpressure_time", backpressure_time // 1000, "ms", MetricReport.LOWER_IS_BETTER + ) + + props = {p["name"]: p["value"] for _, p in request.node.user_properties} + for name in ("ingest", "upload", "recover"): + throughput = int(wal_written_mb / props[name]) + zenbenchmark.record(f"{name}_throughput", throughput, "MB/s", MetricReport.HIGHER_IS_BETTER) + + # Pageserver shutdown will likely get stuck on the upload queue, just shut it down immediately. + env.stop(immediate=True) diff --git a/test_runner/performance/test_perf_ingest_using_pgcopydb.py b/test_runner/performance/test_perf_ingest_using_pgcopydb.py index 37f2e9db5026..f0a0c1f5a251 100644 --- a/test_runner/performance/test_perf_ingest_using_pgcopydb.py +++ b/test_runner/performance/test_perf_ingest_using_pgcopydb.py @@ -60,13 +60,13 @@ def build_pgcopydb_command(pgcopydb_filter_file: Path, test_output_dir: Path): "--no-acl", "--skip-db-properties", "--table-jobs", - "8", + "4", "--index-jobs", - "8", + "4", "--restore-jobs", - "8", + "4", "--split-tables-larger-than", - "5GB", + "10GB", "--skip-extensions", "--use-copy-binary", "--filters", @@ -136,7 +136,7 @@ def run_command_and_log_output(command, log_file_path: Path): "LD_LIBRARY_PATH": f"{os.getenv('PGCOPYDB_LIB_PATH')}:{os.getenv('PG_16_LIB_PATH')}", "PGCOPYDB_SOURCE_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_SOURCE_CONNSTR")), "PGCOPYDB_TARGET_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")), - "PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=16", + "PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7", } # Combine the current environment with custom variables env = os.environ.copy() @@ -184,7 +184,7 @@ def parse_log_and_report_metrics( for metric_name, pattern in metric_patterns.items(): if pattern.search(line): # Extract duration and convert it to seconds - duration_match = re.search(r"\d+h\d+m|\d+s|\d+ms|\d+\.\d+s", line) + duration_match = re.search(r"\d+h\d+m|\d+m\d+s|\d+s|\d+ms|\d+\.\d+s", line) if duration_match: duration_str = duration_match.group(0) parts = re.findall(r"\d+[a-zA-Z]+", duration_str) diff --git a/test_runner/performance/test_sharded_ingest.py b/test_runner/performance/test_sharded_ingest.py index 4c21e799c8e0..94fd54bade6a 100644 --- a/test_runner/performance/test_sharded_ingest.py +++ b/test_runner/performance/test_sharded_ingest.py @@ -90,6 +90,7 @@ def test_sharded_ingest( # Start the endpoint. endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) start_lsn = Lsn(endpoint.safe_psql("select pg_current_wal_lsn()")[0][0]) + # Ingest data and measure WAL volume and duration. with closing(endpoint.connect()) as conn: with conn.cursor() as cur: @@ -104,6 +105,8 @@ def test_sharded_ingest( wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) end_lsn = Lsn(endpoint.safe_psql("select pg_current_wal_lsn()")[0][0]) + + # Record metrics. wal_written_mb = round((end_lsn - start_lsn) / (1024 * 1024)) zenbenchmark.record("wal_written", wal_written_mb, "MB", MetricReport.TEST_PARAM) @@ -152,3 +155,7 @@ def test_sharded_ingest( log.info(f"WAL ingested by each pageserver {ingested_by_ps}") assert tenant_get_shards(env, tenant_id) == shards, "shards moved" + + # The pageservers can take a long time to shut down gracefully, presumably due to the upload + # queue or compactions or something. Just stop them immediately, we don't care. + env.stop(immediate=True) diff --git a/test_runner/performance/test_storage_controller_scale.py b/test_runner/performance/test_storage_controller_scale.py index 142bd3d669ae..49f41483ec7d 100644 --- a/test_runner/performance/test_storage_controller_scale.py +++ b/test_runner/performance/test_storage_controller_scale.py @@ -72,7 +72,7 @@ def test_storage_controller_many_tenants( we don't fall over for a thousand shards. """ - neon_env_builder.num_pageservers = 5 + neon_env_builder.num_pageservers = 6 neon_env_builder.storage_controller_config = { # Default neon_local uses a small timeout: use a longer one to tolerate longer pageserver restarts. # TODO: tune this down as restarts get faster (https://github.com/neondatabase/neon/pull/7553), to @@ -84,6 +84,11 @@ def test_storage_controller_many_tenants( compute_reconfigure_listener.control_plane_compute_hook_api ) + AZS = ["alpha", "bravo", "charlie"] + neon_env_builder.pageserver_config_override = lambda ps_cfg: ps_cfg.update( + {"availability_zone": f"az-{AZS[ps_cfg['id'] % len(AZS)]}"} + ) + # A small sleep on each call into the notify hook, to simulate the latency of doing a database write compute_reconfigure_listener.register_on_notify(lambda body: time.sleep(0.01)) diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py index 670c2698f5aa..45112fd67e2a 100644 --- a/test_runner/regress/test_attach_tenant_config.py +++ b/test_runner/regress/test_attach_tenant_config.py @@ -64,8 +64,6 @@ def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, N ) wait_until( - 50, - 0.1, lambda: env.pageserver.assert_log_contains(".*Error processing HTTP request: Bad request"), ) diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index 302a8fd0d1b4..881503046ce3 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -15,7 +15,7 @@ from fixtures.utils import skip_in_debug_build, wait_until from fixtures.workload import Workload -AGGRESIVE_COMPACTION_TENANT_CONF = { +AGGRESSIVE_COMPACTION_TENANT_CONF = { # Disable gc and compaction. The test runs compaction manually. "gc_period": "0s", "compaction_period": "0s", @@ -24,6 +24,7 @@ # Compact small layers "compaction_target_size": 1024**2, "image_creation_threshold": 2, + # "lsn_lease_length": "0s", -- TODO: would cause branch creation errors, should fix later } @@ -51,7 +52,7 @@ def test_pageserver_compaction_smoke( page_cache_size=10 """ - env = neon_env_builder.init_start(initial_tenant_conf=AGGRESIVE_COMPACTION_TENANT_CONF) + env = neon_env_builder.init_start(initial_tenant_conf=AGGRESSIVE_COMPACTION_TENANT_CONF) tenant_id = env.initial_tenant timeline_id = env.initial_timeline @@ -120,14 +121,28 @@ def test_pageserver_compaction_smoke( assert vectored_average < 8 +@pytest.mark.skip( + "This is being fixed and tracked in https://github.com/neondatabase/neon/issues/9114" +) +@skip_in_debug_build("only run with release build") def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): - env = neon_env_builder.init_start(initial_tenant_conf=AGGRESIVE_COMPACTION_TENANT_CONF) + SMOKE_CONF = { + # Run both gc and gc-compaction. + "gc_period": "5s", + "compaction_period": "5s", + # No PiTR interval and small GC horizon + "pitr_interval": "0s", + "gc_horizon": f"{1024 ** 2}", + "lsn_lease_length": "0s", + } + + env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF) tenant_id = env.initial_tenant timeline_id = env.initial_timeline - row_count = 1000 - churn_rounds = 10 + row_count = 10000 + churn_rounds = 50 ps_http = env.pageserver.http_client() @@ -141,20 +156,28 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): if i % 10 == 0: log.info(f"Running churn round {i}/{churn_rounds} ...") - workload.churn_rows(row_count, env.pageserver.id) - # Force L0 compaction to ensure the number of layers is within bounds, so that gc-compaction can run. - ps_http.timeline_compact(tenant_id, timeline_id, force_l0_compaction=True) - assert ps_http.perf_info(tenant_id, timeline_id)[0]["num_of_l0"] <= 1 ps_http.timeline_compact( tenant_id, timeline_id, enhanced_gc_bottom_most_compaction=True, body={ - "start": "000000000000000000000000000000000000", - "end": "030000000000000000000000000000000000", + "scheduled": True, + "sub_compaction": True, + "compact_range": { + "start": "000000000000000000000000000000000000", + # skip the SLRU range for now -- it races with get-lsn-by-timestamp, TODO: fix this + "end": "010000000000000000000000000000000000", + }, }, ) + workload.churn_rows(row_count, env.pageserver.id) + + # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked) + env.pageserver.assert_log_contains( + "scheduled_compact_timeline.*picked .* layers for compaction" + ) + log.info("Validating at workload end ...") workload.validate(env.pageserver.id) @@ -385,7 +408,7 @@ def assert_broken(): # Wait for enough failures to break the circuit breaker # This wait is fairly long because we back off on compaction failures, so 5 retries takes ~30s - wait_until(60, 1, assert_broken) + wait_until(assert_broken, timeout=60) # Sleep for a while, during which time we expect that compaction will _not_ be retried time.sleep(10) diff --git a/test_runner/regress/test_compute_metrics.py b/test_runner/regress/test_compute_metrics.py index 1b15c5f15efa..787790103fa4 100644 --- a/test_runner/regress/test_compute_metrics.py +++ b/test_runner/regress/test_compute_metrics.py @@ -215,7 +215,7 @@ def __init__( # # The "host" network mode allows sql_exporter to talk to the # endpoint which is running on the host. - super().__init__("docker.io/burningalchemist/sql_exporter:0.13.1", network_mode="host") + super().__init__("docker.io/burningalchemist/sql_exporter:0.16.0", network_mode="host") self.__logs_dir = logs_dir self.__port = port diff --git a/test_runner/regress/test_disk_usage_eviction.py b/test_runner/regress/test_disk_usage_eviction.py index 18075110082b..954db914b9ea 100644 --- a/test_runner/regress/test_disk_usage_eviction.py +++ b/test_runner/regress/test_disk_usage_eviction.py @@ -62,9 +62,8 @@ def assert_overrides(tenant_id, default_tenant_conf_value): if config_level_override is not None: def set_min_resident_size(config): - tenant_config = config.get("tenant_config", {}) + tenant_config = config.setdefault("tenant_config", {}) tenant_config["min_resident_size_override"] = config_level_override - config["tenant_config"] = tenant_config env.pageserver.edit_config_toml(set_min_resident_size) env.pageserver.stop() @@ -211,7 +210,7 @@ def statvfs_called(): pageserver.assert_log_contains(".*running mocked statvfs.*") # we most likely have already completed multiple runs - wait_until(10, 1, statvfs_called) + wait_until(statvfs_called) def count_layers_per_tenant( @@ -772,14 +771,14 @@ def test_statvfs_pressure_usage(eviction_env: EvictionEnv): ) wait_until( - 10, 1, lambda: env.neon_env.pageserver.assert_log_contains(".*disk usage pressure relieved") + lambda: env.neon_env.pageserver.assert_log_contains(".*disk usage pressure relieved") ) def less_than_max_usage_pct(): post_eviction_total_size, _, _ = env.timelines_du(env.pageserver) assert post_eviction_total_size < 0.33 * total_size, "we requested max 33% usage" - wait_until(2, 2, less_than_max_usage_pct) + wait_until(less_than_max_usage_pct, timeout=5) # Disk usage candidate collection only takes into account active tenants. # However, the statvfs call takes into account the entire tenants directory, @@ -825,7 +824,7 @@ def test_statvfs_pressure_min_avail_bytes(eviction_env: EvictionEnv): ) wait_until( - 10, 1, lambda: env.neon_env.pageserver.assert_log_contains(".*disk usage pressure relieved") + lambda: env.neon_env.pageserver.assert_log_contains(".*disk usage pressure relieved"), ) def more_than_min_avail_bytes_freed(): @@ -834,7 +833,7 @@ def more_than_min_avail_bytes_freed(): total_size - post_eviction_total_size >= min_avail_bytes ), f"we requested at least {min_avail_bytes} worth of free space" - wait_until(2, 2, more_than_min_avail_bytes_freed) + wait_until(more_than_min_avail_bytes_freed, timeout=5) def test_secondary_mode_eviction(eviction_env_ha: EvictionEnv): diff --git a/test_runner/regress/test_hot_standby.py b/test_runner/regress/test_hot_standby.py index 0b1ac11c1653..4044f25b37b8 100644 --- a/test_runner/regress/test_hot_standby.py +++ b/test_runner/regress/test_hot_standby.py @@ -257,7 +257,7 @@ def test_hot_standby_feedback(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): # Wait until we see that the pgbench_accounts is created + filled on replica *and* # index is created. Otherwise index creation would conflict with # read queries and hs feedback won't save us. - wait_until(60, 1.0, partial(pgbench_accounts_initialized, secondary)) + wait_until(partial(pgbench_accounts_initialized, secondary), timeout=60) # Test should fail if hs feedback is disabled anyway, but cross # check that walproposer sets some xmin. @@ -269,7 +269,7 @@ def xmin_is_not_null(): log.info(f"xmin is {slot_xmin}") assert int(slot_xmin) > 0 - wait_until(10, 1.0, xmin_is_not_null) + wait_until(xmin_is_not_null) for _ in range(1, 5): # in debug mode takes about 5-7s balance = secondary.safe_psql_scalar("select sum(abalance) from pgbench_accounts") @@ -286,7 +286,7 @@ def xmin_is_null(): log.info(f"xmin is {slot_xmin}") assert slot_xmin is None - wait_until(10, 1.0, xmin_is_null) + wait_until(xmin_is_null) # Test race condition between WAL replay and backends performing queries diff --git a/test_runner/regress/test_layers_from_future.py b/test_runner/regress/test_layers_from_future.py index 761ec7568f4b..8818b407122b 100644 --- a/test_runner/regress/test_layers_from_future.py +++ b/test_runner/regress/test_layers_from_future.py @@ -206,7 +206,7 @@ def future_layer_is_gone_from_index_part(): future_layers = set(get_future_layers()) assert future_layer not in future_layers - wait_until(10, 0.5, future_layer_is_gone_from_index_part) + wait_until(future_layer_is_gone_from_index_part) # We already make deletion stuck here, but we don't necessarily hit the failpoint # because deletions are batched. diff --git a/test_runner/regress/test_logging.py b/test_runner/regress/test_logging.py index f6fbdcabfd9d..d94c786f4983 100644 --- a/test_runner/regress/test_logging.py +++ b/test_runner/regress/test_logging.py @@ -37,7 +37,7 @@ def assert_logged(): return env.pageserver.assert_log_contains(f".*{msg_id}.*") - wait_until(10, 0.5, assert_logged) + wait_until(assert_logged) # make sure it's counted def assert_metric_value(): @@ -49,4 +49,4 @@ def assert_metric_value(): log.info("libmetrics_tracing_event_count: %s", val) assert val > (before or 0.0) - wait_until(10, 1, assert_metric_value) + wait_until(assert_metric_value) diff --git a/test_runner/regress/test_logical_replication.py b/test_runner/regress/test_logical_replication.py index ba471b7147d7..db18e1758c12 100644 --- a/test_runner/regress/test_logical_replication.py +++ b/test_runner/regress/test_logical_replication.py @@ -207,7 +207,7 @@ def slot_removed(ep: Endpoint): log.info(f"ep connstr is {endpoint.connstr()}, subscriber connstr {vanilla_pg.connstr()}") vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub1") - wait_until(number_of_iterations=10, interval=2, func=partial(slot_removed, endpoint)) + wait_until(partial(slot_removed, endpoint)) def test_ondemand_wal_download_in_replication_slot_funcs(neon_env_builder: NeonEnvBuilder): @@ -519,7 +519,7 @@ def check_that_changes_propagated(): assert len(res) == 4 assert [r[0] for r in res] == [10, 20, 30, 40] - wait_until(10, 0.5, check_that_changes_propagated) + wait_until(check_that_changes_propagated) def logical_replication_wait_flush_lsn_sync(publisher: PgProtocol) -> Lsn: @@ -549,7 +549,7 @@ def check_caughtup(): ) assert flush_lsn >= publisher_flush_lsn - wait_until(30, 0.5, check_caughtup) + wait_until(check_caughtup) return publisher_flush_lsn diff --git a/test_runner/regress/test_lsn_mapping.py b/test_runner/regress/test_lsn_mapping.py index 7f0b54112820..e42e71646d97 100644 --- a/test_runner/regress/test_lsn_mapping.py +++ b/test_runner/regress/test_lsn_mapping.py @@ -169,7 +169,7 @@ def test_get_lsn_by_timestamp_cancelled(neon_env_builder: NeonEnvBuilder): ) _, offset = wait_until( - 20, 0.5, lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}") + lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}") ) with pytest.raises(ReadTimeout): @@ -178,8 +178,6 @@ def test_get_lsn_by_timestamp_cancelled(neon_env_builder: NeonEnvBuilder): client.configure_failpoints((failpoint, "off")) _, offset = wait_until( - 20, - 0.5, lambda: env.pageserver.assert_log_contains( "Cancelled request finished with an error: Cancelled$", offset ), diff --git a/test_runner/regress/test_neon_superuser.py b/test_runner/regress/test_neon_superuser.py index 7118127a1ffe..49cd91906f8f 100644 --- a/test_runner/regress/test_neon_superuser.py +++ b/test_runner/regress/test_neon_superuser.py @@ -77,7 +77,7 @@ def check_that_changes_propagated(): assert len(res) == 4 assert [r[0] for r in res] == [10, 20, 30, 40] - wait_until(10, 0.5, check_that_changes_propagated) + wait_until(check_that_changes_propagated) # Test that pg_monitor is working for neon_superuser role cur.execute("SELECT query from pg_stat_activity LIMIT 1") diff --git a/test_runner/regress/test_ondemand_download.py b/test_runner/regress/test_ondemand_download.py index e1caaeb6c1f6..028d1c2e49b8 100644 --- a/test_runner/regress/test_ondemand_download.py +++ b/test_runner/regress/test_ondemand_download.py @@ -256,7 +256,7 @@ def get_resident_physical_size(): ##### Second start, restore the data and ensure it's the same env.pageserver.start() - wait_until(10, 0.2, lambda: assert_tenant_state(client, tenant_id, "Active")) + wait_until(lambda: assert_tenant_state(client, tenant_id, "Active")) # The current_physical_size reports the sum of layers loaded in the layer # map, regardless of where the layer files are located. So even though we @@ -413,7 +413,7 @@ def get_resident_physical_size(): ] ) - wait_until(10, 0.2, lambda: assert_tenant_state(client, tenant_id, "Active")) + wait_until(lambda: assert_tenant_state(client, tenant_id, "Active")) ###### Phase 1: exercise download error code path @@ -705,7 +705,7 @@ def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBu ) _, offset = wait_until( - 20, 0.5, lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}") + lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}") ) location_conf = {"mode": "Detached", "tenant_conf": {}} @@ -713,8 +713,6 @@ def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBu detach = exec.submit(client.tenant_location_conf, env.initial_tenant, location_conf) _, offset = wait_until( - 20, - 0.5, lambda: env.pageserver.assert_log_contains( "closing is taking longer than expected", offset ), @@ -734,8 +732,6 @@ def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBu client.configure_failpoints((failpoint, "pause")) _, offset = wait_until( - 20, - 0.5, lambda: env.pageserver.assert_log_contains(f"cfg failpoint: {failpoint} pause", offset), ) @@ -750,8 +746,6 @@ def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBu warmup = exec.submit(client.tenant_secondary_download, env.initial_tenant, wait_ms=30000) _, offset = wait_until( - 20, - 0.5, lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}", offset), ) @@ -805,7 +799,7 @@ def test_layer_download_timeouted(neon_env_builder: NeonEnvBuilder): ) _, offset = wait_until( - 20, 0.5, lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}") + lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}") ) # ensure enough time while paused to trip the timeout time.sleep(2) @@ -824,8 +818,6 @@ def test_layer_download_timeouted(neon_env_builder: NeonEnvBuilder): # capture the next offset for a new synchronization with the failpoint _, offset = wait_until( - 20, - 0.5, lambda: env.pageserver.assert_log_contains(f"cfg failpoint: {failpoint} pause", offset), ) diff --git a/test_runner/regress/test_pageserver_api.py b/test_runner/regress/test_pageserver_api.py index 05e81b82e07a..55fd7a8608b4 100644 --- a/test_runner/regress/test_pageserver_api.py +++ b/test_runner/regress/test_pageserver_api.py @@ -117,19 +117,11 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv): # We need to wait here because it's possible that we don't have access to # the latest WAL yet, when the `timeline_detail` API is first called. # See: https://github.com/neondatabase/neon/issues/1768. - lsn = wait_until( - number_of_iterations=5, - interval=1, - func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None), - ) + lsn = wait_until(lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None)) # Make a DB modification then expect getting a new WAL receiver's data. endpoint.safe_psql("INSERT INTO t VALUES (1, 'hey')") - wait_until( - number_of_iterations=5, - interval=1, - func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn), - ) + wait_until(lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn)) def test_pageserver_http_api_client(neon_simple_env: NeonEnv): diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index 6ba5753420c7..7e5bb45242ff 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -352,7 +352,7 @@ def test_deletion_queue_recovery( def assert_some_validations(): assert get_deletion_queue_validated(ps_http) > 0 - wait_until(20, 1, assert_some_validations) + wait_until(assert_some_validations) # The validatated keys statistic advances before the header is written, so we # also wait to see the header hit the disk: this seems paranoid but the race @@ -360,7 +360,7 @@ def assert_some_validations(): def assert_header_written(): assert (main_pageserver.workdir / "deletion" / "header-01").exists() - wait_until(20, 1, assert_header_written) + wait_until(assert_header_written) # If we will lose attachment, then our expectation on restart is that only the ones # we already validated will execute. Act like only those were present in the queue. @@ -382,11 +382,11 @@ def assert_deletions_submitted(n: int) -> None: # After restart, issue a flush to kick the deletion frontend to do recovery. # It should recover all the operations we submitted before the restart. ps_http.deletion_queue_flush(execute=False) - wait_until(20, 0.25, lambda: assert_deletions_submitted(before_restart_depth)) + wait_until(lambda: assert_deletions_submitted(before_restart_depth)) # The queue should drain through completely if we flush it ps_http.deletion_queue_flush(execute=True) - wait_until(10, 1, lambda: assert_deletion_queue(ps_http, lambda n: n == 0)) + wait_until(lambda: assert_deletion_queue(ps_http, lambda n: n == 0)) if keep_attachment == KeepAttachment.KEEP: # - If we kept the attachment, then our pre-restart deletions should execute @@ -564,7 +564,7 @@ def test_multi_attach( ) # Initially, the tenant will be attached to the first pageserver (first is default in our test harness) - wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[0], tenant_id, "Active")) + wait_until(lambda: assert_tenant_state(http_clients[0], tenant_id, "Active")) _detail = http_clients[0].timeline_detail(tenant_id, timeline_id) with pytest.raises(PageserverApiException): http_clients[1].timeline_detail(tenant_id, timeline_id) @@ -579,8 +579,8 @@ def test_multi_attach( pageservers[1].tenant_attach(env.initial_tenant) pageservers[2].tenant_attach(env.initial_tenant) - wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[1], tenant_id, "Active")) - wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[2], tenant_id, "Active")) + wait_until(lambda: assert_tenant_state(http_clients[1], tenant_id, "Active")) + wait_until(lambda: assert_tenant_state(http_clients[2], tenant_id, "Active")) # Now they all have it attached _details = list([c.timeline_detail(tenant_id, timeline_id) for c in http_clients]) diff --git a/test_runner/regress/test_pageserver_getpage_throttle.py b/test_runner/regress/test_pageserver_getpage_throttle.py index f1aad85fe98a..9644ebe3e2be 100644 --- a/test_runner/regress/test_pageserver_getpage_throttle.py +++ b/test_runner/regress/test_pageserver_getpage_throttle.py @@ -4,6 +4,7 @@ import json import uuid +import pytest from anyio import Path from fixtures.common_types import TenantId, TimelineId from fixtures.log_helper import log @@ -32,7 +33,9 @@ def test_pageserver_getpage_throttle(neon_env_builder: NeonEnvBuilder, pg_bin: P conf={ "compaction_period": f"{compaction_period}s", "timeline_get_throttle": { - "task_kinds": ["PageRequestHandler"], + "task_kinds": [ + "PageRequestHandler" + ], # any non-empty array will do here https://github.com/neondatabase/neon/pull/9962 "initial": 0, "refill_interval": "100ms", "refill_amount": int(rate_limit_rps / 10), @@ -70,20 +73,25 @@ def run_pagebench_at_max_speed_and_get_total_requests_completed(duration_secs: i log.info("warmup / make sure metrics are present") run_pagebench_at_max_speed_and_get_total_requests_completed(2) - metrics_query = { + smgr_metrics_query = { "tenant_id": str(tenant_id), "timeline_id": str(timeline_id), "smgr_query_type": "get_page_at_lsn", } - metric_name = "pageserver_smgr_query_seconds_sum" - smgr_query_seconds_pre = ps_http.get_metric_value(metric_name, metrics_query) + smgr_metric_name = "pageserver_smgr_query_seconds_sum" + throttle_metrics_query = { + "tenant_id": str(tenant_id), + } + throttle_metric_name = "pageserver_tenant_throttling_wait_usecs_sum_total" + + smgr_query_seconds_pre = ps_http.get_metric_value(smgr_metric_name, smgr_metrics_query) assert smgr_query_seconds_pre is not None + throttled_usecs_pre = ps_http.get_metric_value(throttle_metric_name, throttle_metrics_query) + assert throttled_usecs_pre is not None marker = uuid.uuid4().hex ps_http.post_tracing_event("info", marker) - _, marker_offset = wait_until( - 10, 0.5, lambda: env.pageserver.assert_log_contains(marker, offset=None) - ) + _, marker_offset = wait_until(lambda: env.pageserver.assert_log_contains(marker, offset=None)) log.info("run pagebench") duration_secs = 10 @@ -103,23 +111,31 @@ def run_pagebench_at_max_speed_and_get_total_requests_completed(duration_secs: i log.info("validate that we logged the throttling") wait_until( - 10, - compaction_period / 10, lambda: env.pageserver.assert_log_contains( f".*{tenant_id}.*shard was throttled in the last n_seconds.*", offset=marker_offset, ), + timeout=compaction_period, ) - log.info("validate that the metric doesn't include throttle wait time") - smgr_query_seconds_post = ps_http.get_metric_value(metric_name, metrics_query) + smgr_query_seconds_post = ps_http.get_metric_value(smgr_metric_name, smgr_metrics_query) assert smgr_query_seconds_post is not None + throttled_usecs_post = ps_http.get_metric_value(throttle_metric_name, throttle_metrics_query) + assert throttled_usecs_post is not None actual_smgr_query_seconds = smgr_query_seconds_post - smgr_query_seconds_pre + actual_throttled_usecs = throttled_usecs_post - throttled_usecs_pre + actual_throttled_secs = actual_throttled_usecs / 1_000_000 + log.info("validate that the metric doesn't include throttle wait time") assert ( duration_secs >= 10 * actual_smgr_query_seconds ), "smgr metrics should not include throttle wait time" + log.info("validate that the throttling wait time metrics is correct") + assert ( + pytest.approx(actual_throttled_secs + actual_smgr_query_seconds, 0.1) == duration_secs + ), "most of the time in this test is spent throttled because the rate-limit's contribution to latency dominates" + throttle_config_with_field_fair_set = { "task_kinds": ["PageRequestHandler"], @@ -167,7 +183,8 @@ def test_throttle_fair_config_is_settable_but_ignored_in_config_toml( """ def set_tenant_config(ps_cfg): - ps_cfg["tenant_config"] = {"timeline_get_throttle": throttle_config_with_field_fair_set} + tenant_config = ps_cfg.setdefault("tenant_config", {}) + tenant_config["timeline_get_throttle"] = throttle_config_with_field_fair_set neon_env_builder.pageserver_config_override = set_tenant_config env = neon_env_builder.init_start() diff --git a/test_runner/regress/test_pageserver_layer_rolling.py b/test_runner/regress/test_pageserver_layer_rolling.py index f6a7bfa1ade5..706da1e35e00 100644 --- a/test_runner/regress/test_pageserver_layer_rolling.py +++ b/test_runner/regress/test_pageserver_layer_rolling.py @@ -84,7 +84,7 @@ def query(): # The metric gets initialised on the first update. # Retry a few times, but return 0 if it's stable. try: - return float(wait_until(3, 0.5, query)) + return float(wait_until(query, timeout=2, interval=0.5)) except Exception: return 0 @@ -131,7 +131,7 @@ def test_pageserver_small_inmemory_layers( wait_until_pageserver_is_caught_up(env, last_flush_lsns) # We didn't write enough data to trigger a size-based checkpoint: we should see dirty data. - wait_until(10, 1, lambda: assert_dirty_bytes_nonzero(env)) + wait_until(lambda: assert_dirty_bytes_nonzero(env)) ps_http_client = env.pageserver.http_client() total_wal_ingested_before_restart = wait_for_wal_ingest_metric(ps_http_client) @@ -139,7 +139,7 @@ def test_pageserver_small_inmemory_layers( # Within ~ the checkpoint interval, all the ephemeral layers should be frozen and flushed, # such that there are zero bytes of ephemeral layer left on the pageserver log.info("Waiting for background checkpoints...") - wait_until(CHECKPOINT_TIMEOUT_SECONDS * 2, 1, lambda: assert_dirty_bytes(env, 0)) + wait_until(lambda: assert_dirty_bytes(env, 0), timeout=2 * CHECKPOINT_TIMEOUT_SECONDS) # Zero ephemeral layer bytes does not imply that all the frozen layers were uploaded: they # must be uploaded to remain visible to the pageserver after restart. @@ -180,7 +180,7 @@ def test_idle_checkpoints(neon_env_builder: NeonEnvBuilder): wait_until_pageserver_is_caught_up(env, last_flush_lsns) # We didn't write enough data to trigger a size-based checkpoint: we should see dirty data. - wait_until(10, 1, lambda: assert_dirty_bytes_nonzero(env)) + wait_until(lambda: assert_dirty_bytes_nonzero(env)) # Stop the safekeepers, so that we cannot have any more WAL receiver connections for sk in env.safekeepers: @@ -193,7 +193,7 @@ def test_idle_checkpoints(neon_env_builder: NeonEnvBuilder): # Within ~ the checkpoint interval, all the ephemeral layers should be frozen and flushed, # such that there are zero bytes of ephemeral layer left on the pageserver log.info("Waiting for background checkpoints...") - wait_until(CHECKPOINT_TIMEOUT_SECONDS * 2, 1, lambda: assert_dirty_bytes(env, 0)) + wait_until(lambda: assert_dirty_bytes(env, 0), timeout=2 * CHECKPOINT_TIMEOUT_SECONDS) # The code below verifies that we do not flush on the first write # after an idle period longer than the checkpoint timeout. @@ -210,7 +210,7 @@ def test_idle_checkpoints(neon_env_builder: NeonEnvBuilder): run_worker_for_tenant(env, 5, tenant_with_extra_writes, offset=ENTRIES_PER_TIMELINE) ) - dirty_after_write = wait_until(10, 1, lambda: assert_dirty_bytes_nonzero(env)) + dirty_after_write = wait_until(lambda: assert_dirty_bytes_nonzero(env)) # We shouldn't flush since we've just opened a new layer waited_for = 0 @@ -305,11 +305,11 @@ def assert_bytes_rolled(): # Wait until enough layers have rolled that the amount of dirty data is under the threshold. # We do this indirectly via layer maps, rather than the dirty bytes metric, to avoid false-passing # if that metric isn't updated quickly enough to reflect the dirty bytes exceeding the limit. - wait_until(compaction_period_s * 2, 1, assert_bytes_rolled) + wait_until(assert_bytes_rolled, timeout=2 * compaction_period_s) # The end state should also have the reported metric under the limit def assert_dirty_data_limited(): dirty_bytes = get_dirty_bytes(env) assert dirty_bytes < max_dirty_data - wait_until(compaction_period_s * 2, 1, lambda: assert_dirty_data_limited()) + wait_until(lambda: assert_dirty_data_limited(), timeout=2 * compaction_period_s) diff --git a/test_runner/regress/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py index 4bf570551731..835ccbd5d430 100644 --- a/test_runner/regress/test_pageserver_restart.py +++ b/test_runner/regress/test_pageserver_restart.py @@ -103,7 +103,7 @@ def assert_complete(): raise AssertionError("No 'complete' metric yet") - wait_until(30, 1.0, assert_complete) + wait_until(assert_complete) # Expectation callbacks: arg t is sample value, arg p is the previous phase's sample value expectations = [ diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index a264f4d3c9c2..1292682f9e3d 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -356,7 +356,7 @@ def caught_up(): ) assert destination_lsn >= origin_lsn - wait_until(100, 0.1, caught_up) + wait_until(caught_up) # The destination should accept writes workload.churn_rows(64, pageserver_b.id) @@ -411,7 +411,7 @@ def blocked_deletions_drained(): assert submitted is not None assert submitted > 0 - wait_until(10, 0.1, blocked_deletions_drained) + wait_until(blocked_deletions_drained) workload.churn_rows(64, pageserver_b.id) workload.validate(pageserver_b.id) @@ -702,7 +702,7 @@ def await_log(pageserver, deadline, expression): else: timeout = int(deadline - now) + 1 try: - wait_until(timeout, 1, lambda: pageserver.assert_log_contains(expression)) + wait_until(lambda: pageserver.assert_log_contains(expression), timeout=timeout) except: log.error(f"Timed out waiting for '{expression}'") raise diff --git a/test_runner/regress/test_proxy.py b/test_runner/regress/test_proxy.py index 5a01d90d8548..d8df2efc78f2 100644 --- a/test_runner/regress/test_proxy.py +++ b/test_runner/regress/test_proxy.py @@ -5,6 +5,7 @@ import subprocess import time import urllib.parse +from contextlib import closing from typing import TYPE_CHECKING import psycopg2 @@ -131,6 +132,24 @@ def test_proxy_options(static_proxy: NeonProxy, option_name: str): assert out[0][0] == " str" +@pytest.mark.asyncio +async def test_proxy_arbitrary_params(static_proxy: NeonProxy): + with closing( + await static_proxy.connect_async(server_settings={"IntervalStyle": "iso_8601"}) + ) as conn: + out = await conn.fetchval("select to_json('0 seconds'::interval)") + assert out == '"00:00:00"' + + options = "neon_proxy_params_compat:true" + with closing( + await static_proxy.connect_async( + server_settings={"IntervalStyle": "iso_8601", "options": options} + ) + ) as conn: + out = await conn.fetchval("select to_json('0 seconds'::interval)") + assert out == '"PT0S"' + + def test_auth_errors(static_proxy: NeonProxy): """ Check that we throw very specific errors in some unsuccessful auth scenarios. diff --git a/test_runner/regress/test_readonly_node.py b/test_runner/regress/test_readonly_node.py index 70d558ac5af0..c13bea7ee178 100644 --- a/test_runner/regress/test_readonly_node.py +++ b/test_runner/regress/test_readonly_node.py @@ -215,8 +215,6 @@ def trigger_gc_and_select( # wait for lease renewal before running query. _, offset = wait_until( - 20, - 0.5, lambda: ep_static.assert_log_contains( "lsn_lease_bg_task.*Request succeeded", offset=offset ), diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 137e75f78446..76a42ef4a2a2 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -300,9 +300,9 @@ def get_queued_count(file_kind, op_kind): print_gc_result(gc_result) assert gc_result["layers_removed"] > 0 - wait_until(2, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="upload"), 0)) - wait_until(2, 1, lambda: assert_eq(get_queued_count(file_kind="index", op_kind="upload"), 0)) - wait_until(2, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0)) + wait_until(lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="upload"), 0)) + wait_until(lambda: assert_eq(get_queued_count(file_kind="index", op_kind="upload"), 0)) + wait_until(lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0)) # let all future operations queue up configure_storage_sync_failpoints("return") @@ -333,16 +333,28 @@ def churn_while_failpoints_active(result): # wait for churn thread's data to get stuck in the upload queue # Exponential back-off in upload queue, so, gracious timeouts. - wait_until(30, 1, lambda: assert_gt(get_queued_count(file_kind="layer", op_kind="upload"), 0)) - wait_until(30, 1, lambda: assert_ge(get_queued_count(file_kind="index", op_kind="upload"), 1)) - wait_until(30, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0)) + wait_until( + lambda: assert_gt(get_queued_count(file_kind="layer", op_kind="upload"), 0), timeout=30 + ) + wait_until( + lambda: assert_ge(get_queued_count(file_kind="index", op_kind="upload"), 1), timeout=30 + ) + wait_until( + lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0), timeout=30 + ) # unblock churn operations configure_storage_sync_failpoints("off") - wait_until(30, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="upload"), 0)) - wait_until(30, 1, lambda: assert_eq(get_queued_count(file_kind="index", op_kind="upload"), 0)) - wait_until(30, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0)) + wait_until( + lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="upload"), 0), timeout=30 + ) + wait_until( + lambda: assert_eq(get_queued_count(file_kind="index", op_kind="upload"), 0), timeout=30 + ) + wait_until( + lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0), timeout=30 + ) # The churn thread doesn't make progress once it blocks on the first wait_completion() call, # so, give it some time to wrap up. @@ -580,7 +592,7 @@ def assert_compacted_and_uploads_queued(): > 0 ) - wait_until(200, 0.1, assert_compacted_and_uploads_queued) + wait_until(assert_compacted_and_uploads_queued) # Regardless, give checkpoint some time to block for good. # Not strictly necessary, but might help uncover failure modes in the future. @@ -598,9 +610,7 @@ def assert_compacted_and_uploads_queued(): ] ) - # Generous timeout, because currently deletions can get blocked waiting for compaction - # This can be reduced when https://github.com/neondatabase/neon/issues/4998 is fixed. - timeline_delete_wait_completed(client, tenant_id, timeline_id, iterations=30, interval=1) + timeline_delete_wait_completed(client, tenant_id, timeline_id) assert not timeline_path.exists() @@ -826,22 +836,16 @@ def wait_upload_queue_empty( client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId ): wait_until( - 2, - 1, lambda: assert_eq( get_queued_count(client, tenant_id, timeline_id, file_kind="layer", op_kind="upload"), 0 ), ) wait_until( - 2, - 1, lambda: assert_eq( get_queued_count(client, tenant_id, timeline_id, file_kind="index", op_kind="upload"), 0 ), ) wait_until( - 2, - 1, lambda: assert_eq( get_queued_count(client, tenant_id, timeline_id, file_kind="layer", op_kind="delete"), 0 ), diff --git a/test_runner/regress/test_replica_start.py b/test_runner/regress/test_replica_start.py index 8e7c01f95029..e2a22cc769f7 100644 --- a/test_runner/regress/test_replica_start.py +++ b/test_runner/regress/test_replica_start.py @@ -378,7 +378,7 @@ def check_replica_crashed(): return None raise RuntimeError("connection succeeded") - wait_until(20, 0.5, check_replica_crashed) + wait_until(check_replica_crashed) assert secondary.log_contains("too many KnownAssignedXids") # Replica is crashed, so ignore stop result diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index 411574bd8621..30abf91d3a6f 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -519,6 +519,13 @@ def test_sharding_split_smoke( # We will have 2 shards per pageserver once done (including secondaries) neon_env_builder.num_pageservers = split_shard_count + # Two AZs + def assign_az(ps_cfg): + az = f"az-{(ps_cfg['id'] - 1) % 2}" + ps_cfg["availability_zone"] = az + + neon_env_builder.pageserver_config_override = assign_az + # 1MiB stripes: enable getting some meaningful data distribution without # writing large quantities of data in this test. The stripe size is given # in number of 8KiB pages. @@ -836,7 +843,7 @@ def assert_restart_notification(): assert len(notifications) == 3 assert notifications[2] == expect_after - wait_until(10, 1, assert_restart_notification) + wait_until(assert_restart_notification) # The quantity of data isn't huge, but debug can be _very_ slow, and the things we're @@ -1025,7 +1032,7 @@ def assert_all_disk_consistent(): assert Lsn(timeline_detail["disk_consistent_lsn"]) >= expect_lsn # We set a short checkpoint timeout: expect things to get frozen+flushed within that - wait_until(checkpoint_interval_secs * 3, 1, assert_all_disk_consistent) + wait_until(assert_all_disk_consistent, timeout=3 * checkpoint_interval_secs) def assert_all_remote_consistent(): """ @@ -1037,7 +1044,7 @@ def assert_all_remote_consistent(): assert Lsn(timeline_detail["remote_consistent_lsn"]) >= expect_lsn # We set a short checkpoint timeout: expect things to get frozen+flushed within that - wait_until(checkpoint_interval_secs * 3, 1, assert_all_remote_consistent) + wait_until(assert_all_remote_consistent, timeout=3 * checkpoint_interval_secs) workload.validate() @@ -1405,14 +1412,14 @@ def finish_split(): # e.g. while waiting for a storage controller to re-attach a parent shard if we failed # inside the pageserver and the storage controller responds by detaching children and attaching # parents concurrently (https://github.com/neondatabase/neon/issues/7148) - wait_until(10, 1, lambda: workload.churn_rows(10, upload=False, ingest=False)) + wait_until(lambda: workload.churn_rows(10, upload=False, ingest=False)) workload.validate() if failure.fails_forward(env): log.info("Fail-forward failure, checking split eventually completes...") # A failure type which results in eventual completion of the split - wait_until(30, 1, assert_split_done) + wait_until(assert_split_done) elif failure.can_mitigate(): log.info("Mitigating failure...") # Mitigation phase: we expect to be able to proceed with a successful shard split @@ -1420,21 +1427,21 @@ def finish_split(): # The split should appear to be rolled back from the point of view of all pageservers # apart from the one that is offline - wait_until(30, 1, lambda: assert_rolled_back(exclude_ps_id=failure.pageserver_id)) + wait_until(lambda: assert_rolled_back(exclude_ps_id=failure.pageserver_id)) finish_split() - wait_until(30, 1, lambda: assert_split_done(exclude_ps_id=failure.pageserver_id)) + wait_until(lambda: assert_split_done(exclude_ps_id=failure.pageserver_id)) # Having cleared the failure, everything should converge to a pristine state failure.clear(env) - wait_until(30, 1, assert_split_done) + wait_until(assert_split_done) else: # Once we restore the faulty pageserver's API to good health, rollback should # eventually complete. log.info("Clearing failure...") failure.clear(env) - wait_until(30, 1, assert_rolled_back) + wait_until(assert_rolled_back) # Having rolled back, the tenant should be working workload.churn_rows(10) diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 13bc54a1146d..9f74dcccb99e 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -154,7 +154,7 @@ def node_evacuated(node_id: int) -> None: counts = get_node_shard_counts(env, tenant_ids) assert counts[node_id] == 0 - wait_until(10, 1, lambda: node_evacuated(env.pageservers[0].id)) + wait_until(lambda: node_evacuated(env.pageservers[0].id)) # Let all the reconciliations after marking the node offline complete env.storage_controller.reconcile_until_idle() @@ -222,7 +222,7 @@ def test_node_status_after_restart( def is_ready(): assert env.storage_controller.ready() is True - wait_until(30, 1, is_ready) + wait_until(is_ready) # We loaded nodes from database on restart nodes = env.storage_controller.node_list() @@ -606,7 +606,7 @@ def node_evacuated(node_id: int) -> None: counts = get_node_shard_counts(env, [env.initial_tenant]) assert counts[node_id] == 0 - wait_until(10, 1, lambda: node_evacuated(env.pageservers[0].id)) + wait_until(lambda: node_evacuated(env.pageservers[0].id)) # Additional notification from migration log.info(f"notifications: {notifications}") @@ -620,7 +620,7 @@ def received_migration_notification(): assert len(notifications) == 2 assert notifications[1] == expect - wait_until(20, 0.25, received_migration_notification) + wait_until(received_migration_notification) # When we restart, we should re-emit notifications for all tenants env.storage_controller.stop() @@ -630,7 +630,7 @@ def received_restart_notification(): assert len(notifications) == 3 assert notifications[2] == expect - wait_until(10, 1, received_restart_notification) + wait_until(received_restart_notification) # Splitting a tenant should cause its stripe size to become visible in the compute notification env.storage_controller.tenant_shard_split(env.initial_tenant, shard_count=2) @@ -647,7 +647,7 @@ def received_split_notification(): assert len(notifications) == 4 assert notifications[3] == expect - wait_until(10, 1, received_split_notification) + wait_until(received_split_notification) # If the compute hook is unavailable, that should not block creating a tenant and # creating a timeline. This simulates a control plane refusing to accept notifications @@ -736,7 +736,7 @@ def handler(request: Request): def logged_stuck(): env.storage_controller.assert_log_contains(NOTIFY_BLOCKED_LOG) - wait_until(10, 0.25, logged_stuck) + wait_until(logged_stuck) contains_r = env.storage_controller.log_contains(NOTIFY_BLOCKED_LOG) assert contains_r is not None # Appease mypy (_, log_cursor) = contains_r @@ -764,7 +764,7 @@ def logged_stuck(): def logged_stuck_again(): env.storage_controller.assert_log_contains(NOTIFY_BLOCKED_LOG, offset=log_cursor) - wait_until(10, 0.25, logged_stuck_again) + wait_until(logged_stuck_again) assert migrate_fut.running() # This time, the compute hook remains stuck, but we mark the origin node offline: this should @@ -865,7 +865,7 @@ def notified_ps(ps_id: int) -> None: assert latest["shards"] is not None assert latest["shards"][0]["node_id"] == ps_id - wait_until(30, 1, lambda: notified_ps(pageserver_a.id)) + wait_until(lambda: notified_ps(pageserver_a.id)) env.storage_controller.allowed_errors.append(NOTIFY_BLOCKED_LOG) env.storage_controller.allowed_errors.extend(NOTIFY_FAILURE_LOGS) @@ -880,7 +880,7 @@ def notified_ps(ps_id: int) -> None: # Although the migration API failed, the hook should still see pageserver B (it remembers what # was posted even when returning an error code) - wait_until(30, 1, lambda: notified_ps(pageserver_b.id)) + wait_until(lambda: notified_ps(pageserver_b.id)) # Although the migration API failed, the tenant should still have moved to the right pageserver assert len(pageserver_b.http_client().tenant_list()) == 1 @@ -898,7 +898,7 @@ def notified_ps(ps_id: int) -> None: def logged_giving_up(): env.storage_controller.assert_log_contains(".*Giving up on compute notification.*") - wait_until(30, 1, logged_giving_up) + wait_until(logged_giving_up) pageserver_a.start() @@ -919,7 +919,7 @@ def logged_giving_up(): handle_params["status"] = 200 env.storage_controller.tenant_shard_migrate(tenant_shard_id, pageserver_a.id) - wait_until(30, 1, lambda: notified_ps(pageserver_a.id)) + wait_until(lambda: notified_ps(pageserver_a.id)) def test_storage_controller_debug_apis(neon_env_builder: NeonEnvBuilder): @@ -1453,7 +1453,7 @@ def tenants_placed(): # Check that each node got one tenant assert all(len(ts) == 1 for ts in node_to_tenants.values()) - wait_until(10, 1, tenants_placed) + wait_until(tenants_placed) # ... then we apply the failure offline_node_ids = set(failure.nodes()) @@ -1476,7 +1476,7 @@ def nodes_offline(): assert node["availability"] == "Offline" start = time.time() - wait_until(failure.offline_timeout, 1, nodes_offline) + wait_until(nodes_offline, timeout=failure.offline_timeout) detected_after = time.time() - start log.info(f"Detected node failures after {detected_after}s") @@ -1497,7 +1497,7 @@ def tenant_migrated(): assert observed_tenants == set(tenant_ids) - wait_until(10, 1, tenant_migrated) + wait_until(tenant_migrated) # ... then we clear the failure failure.clear(env) @@ -1509,7 +1509,7 @@ def nodes_online(): if node["id"] in online_node_ids: assert node["availability"] == "Active" - wait_until(10, 1, nodes_online) + wait_until(nodes_online) time.sleep(5) @@ -1562,7 +1562,7 @@ def failed_over(): # We could pre-empty this by configuring the node to Offline, but it's preferable to test # the realistic path we would take when a node restarts uncleanly. # The delay here will be ~NEON_LOCAL_MAX_UNAVAILABLE_INTERVAL in neon_local - wait_until(30, 1, failed_over) + wait_until(failed_over) reconciles_before_restart = env.storage_controller.get_metric_value( "storage_controller_reconcile_complete_total", filter={"status": "ok"} @@ -1640,12 +1640,12 @@ def assert_errors_gt(n) -> int: assert e > n return e - errs = wait_until(10, 1, lambda: assert_errors_gt(0)) + errs = wait_until(lambda: assert_errors_gt(0)) # Try reconciling again, it should fail again with pytest.raises(StorageControllerApiException): env.storage_controller.reconcile_all() - errs = wait_until(10, 1, lambda: assert_errors_gt(errs)) + errs = wait_until(lambda: assert_errors_gt(errs)) # Configure the tenant to disable reconciles env.storage_controller.tenant_policy_update( @@ -1674,7 +1674,7 @@ def assert_ok_gt(n) -> int: return o # We should see a successful reconciliation - wait_until(10, 1, lambda: assert_ok_gt(0)) + wait_until(lambda: assert_ok_gt(0)) # And indeed the tenant should be attached assert len(env.pageserver.http_client().tenant_list_locations()["tenant_shards"]) == 1 @@ -1747,8 +1747,8 @@ def storcon_cli(args): # Describe a tenant tenant_lines = storcon_cli(["tenant-describe", "--tenant-id", str(env.initial_tenant)]) - assert len(tenant_lines) == 3 + shard_count * 2 - assert str(env.initial_tenant) in tenant_lines[3] + assert len(tenant_lines) >= 3 + shard_count * 2 + assert str(env.initial_tenant) in tenant_lines[0] # Pause changes on a tenant storcon_cli(["tenant-policy", "--tenant-id", str(env.initial_tenant), "--scheduling", "stop"]) @@ -2073,7 +2073,7 @@ def secondary_is_lagging(): raise Exception(f"Secondary lag not big enough: {lag}") log.info(f"Looking for lag to develop on the secondary {secondary}") - wait_until(10, 1, secondary_is_lagging) + wait_until(secondary_is_lagging) log.info(f"Starting drain of primary {primary} with laggy secondary {secondary}") env.storage_controller.retryable_node_operation( @@ -2107,7 +2107,7 @@ def lag_is_acceptable(): if lag > 1 * 1024 * 1024: raise Exception(f"Secondary lag not big enough: {lag}") - wait_until(10, 1, lag_is_acceptable) + wait_until(lag_is_acceptable) env.storage_controller.node_configure(primary, {"scheduling": "Active"}) @@ -2227,7 +2227,7 @@ def assert_shards_migrated(): log.info(f"Shards on nodes other than on victim: {elsewhere}") assert elsewhere == tenant_count * shard_count_per_tenant - wait_until(30, 1, assert_shards_migrated) + wait_until(assert_shards_migrated) log.info(f"Deleting pageserver {victim.id}") env.storage_controller.node_delete(victim.id) @@ -2240,7 +2240,7 @@ def assert_victim_evacuated(): log.info(f"Shards on node {victim.id}: {count}") assert count == 0 - wait_until(30, 1, assert_victim_evacuated) + wait_until(assert_victim_evacuated) # The node should be gone from the list API assert victim.id not in [n["id"] for n in env.storage_controller.node_list()] @@ -2253,12 +2253,7 @@ def assert_victim_evacuated(): assert victim.id not in shard["node_secondary"] # Reconciles running during deletion should all complete - # FIXME: this currently doesn't work because the deletion schedules shards without a proper ScheduleContext, resulting - # in states that background_reconcile wants to optimize, but can't proceed with migrations yet because this is a short3 - # test that hasn't uploaded any heatmaps for secondaries. - # In the interim, just do a reconcile_all to enable the consistency check. - # env.storage_controller.reconcile_until_idle() - env.storage_controller.reconcile_all() + env.storage_controller.reconcile_until_idle() # Controller should pass its own consistency checks env.storage_controller.consistency_check() @@ -2267,7 +2262,6 @@ def assert_victim_evacuated(): env.storage_controller.stop() env.storage_controller.start() assert victim.id not in [n["id"] for n in env.storage_controller.node_list()] - env.storage_controller.reconcile_all() # FIXME: workaround for optimizations happening on startup, see FIXME above. env.storage_controller.consistency_check() @@ -2569,7 +2563,7 @@ def previous_stepped_down(): == StorageControllerLeadershipStatus.STEPPED_DOWN ) - wait_until(5, 1, previous_stepped_down) + wait_until(previous_stepped_down) storage_controller_proxy.route_to(f"http://127.0.0.1:{storage_controller_2_port}") @@ -2579,7 +2573,7 @@ def new_becomes_leader(): == StorageControllerLeadershipStatus.LEADER ) - wait_until(15, 1, new_becomes_leader) + wait_until(new_becomes_leader) leader = env.storage_controller.get_leader() assert leader["address"] == f"http://127.0.0.1:{storage_controller_2_port}/" @@ -2624,7 +2618,7 @@ def attached_is_draining(): env.storage_controller.configure_failpoints(("sleepy-drain-loop", "return(10000)")) env.storage_controller.node_drain(attached.id) - wait_until(10, 0.5, attached_is_draining) + wait_until(attached_is_draining) attached.restart() @@ -2646,7 +2640,7 @@ def reconfigure_node_again(): env.storage_controller.node_configure(attached.id, {"scheduling": "Pause"}) # allow for small delay between actually having cancelled and being able reconfigure again - wait_until(4, 0.5, reconfigure_node_again) + wait_until(reconfigure_node_again) def test_storage_controller_timeline_crud_race(neon_env_builder: NeonEnvBuilder): @@ -2691,7 +2685,7 @@ def has_hit_failpoint(): ps.log_contains(f"at failpoint {failpoint}") is not None for ps in env.pageservers ) - wait_until(10, 1, has_hit_failpoint) + wait_until(has_hit_failpoint) # Migrate the tenant while the timeline creation is in progress: this migration will complete once it # can detach from the old pageserver, which will happen once the failpoint completes. @@ -2775,7 +2769,7 @@ def test_storage_controller_validate_during_migration(neon_env_builder: NeonEnvB def has_hit_compaction_failpoint(): assert origin_pageserver.log_contains(f"at failpoint {compaction_failpoint}") - wait_until(10, 1, has_hit_compaction_failpoint) + wait_until(has_hit_compaction_failpoint) # While the compaction is running, start a live migration which will pause long enough for the compaction to sleep, # after incrementing generation and attaching the new location @@ -2794,7 +2788,7 @@ def has_hit_migration_failpoint(): # before it reaches this point. The timeout is because the AttachedStale transition includes # a flush of remote storage, and if the compaction already enqueued an index upload this cannot # make progress. - wait_until(60, 1, has_hit_migration_failpoint) + wait_until(has_hit_migration_failpoint, timeout=60) # Origin pageserver has succeeded with compaction before the migration completed. It has done all the writes it wanted to do in its own (stale) generation origin_pageserver.http_client().configure_failpoints((compaction_failpoint, "off")) @@ -2917,7 +2911,7 @@ def has_hit_migration_failpoint(): log.info(expr) assert env.storage_controller.log_contains(expr) - wait_until(10, 1, has_hit_migration_failpoint) + wait_until(has_hit_migration_failpoint) # This request should be routed to whichever pageserver holds the highest generation tenant_info = env.storage_controller.pageserver_api().tenant_status( @@ -2934,7 +2928,7 @@ def has_hit_migration_failpoint(): # We expect request to land on the origin assert tenant_info["generation"] == 1 - wait_until(10, 1, long_migration_metric_published) + wait_until(long_migration_metric_published) # Eventually migration completes env.storage_controller.configure_failpoints((migration_failpoint.value, "off")) @@ -3063,7 +3057,11 @@ def assign_az(ps_cfg): for shard in shards: attached_to = shard["node_attached"] expected_az = env.get_pageserver(attached_to).az_id - assert shard["preferred_az_id"] == expected_az + + # The scheduling optimization logic is not yet AZ-aware, so doesn't succeed + # in putting the tenant shards in the preferred AZ. + # To be fixed in https://github.com/neondatabase/neon/pull/9916 + # assert shard["preferred_az_id"] == expected_az @run_only_on_default_postgres("Postgres version makes no difference here") @@ -3113,7 +3111,7 @@ def has_hit_migration_failpoint(): log.info(expr) assert env.storage_controller.log_contains(expr) - wait_until(10, 1, has_hit_migration_failpoint) + wait_until(has_hit_migration_failpoint) env.storage_controller.pageserver_api().timeline_delete( tenant_id=tenant_id, timeline_id=timeline_id @@ -3182,7 +3180,7 @@ def has_hit_migration_failpoint(): log.info(expr) assert env.storage_controller.log_contains(expr) - wait_until(10, 1, has_hit_migration_failpoint) + wait_until(has_hit_migration_failpoint) timeline_id = TimelineId.generate() env.storage_controller.pageserver_api().timeline_create( @@ -3232,3 +3230,55 @@ def has_hit_migration_failpoint(): # Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown env.storage_controller.configure_failpoints((migration_failpoint.value, "off")) raise + + +@run_only_on_default_postgres("Postgres version makes no difference here") +def test_storage_controller_detached_stopped( + neon_env_builder: NeonEnvBuilder, +): + """ + Test that detaching a tenant while it has scheduling policy set to Paused or Stop works + """ + + remote_storage_kind = s3_storage() + neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind) + + neon_env_builder.num_pageservers = 1 + + env = neon_env_builder.init_configs() + env.start() + virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True) + + tenant_id = TenantId.generate() + env.storage_controller.tenant_create( + tenant_id, + shard_count=1, + ) + + assert len(env.pageserver.http_client().tenant_list_locations()["tenant_shards"]) == 1 + + # Disable scheduling: ordinarily this would prevent the tenant's configuration being + # reconciled to pageservers, but this should be overridden when detaching. + env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy.*") + env.storage_controller.tenant_policy_update( + tenant_id, + {"scheduling": "Stop"}, + ) + + env.storage_controller.consistency_check() + + # Detach the tenant + virtual_ps_http.tenant_location_conf( + tenant_id, + { + "mode": "Detached", + "secondary_conf": None, + "tenant_conf": {}, + "generation": None, + }, + ) + + env.storage_controller.consistency_check() + + # Confirm the detach happened + assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == [] diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index 3991bd7061b9..b16dc54c248b 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -431,8 +431,6 @@ def stuck_split(): # Let the controller reach the failpoint wait_until( - 10, - 1, lambda: env.storage_controller.assert_log_contains( 'failpoint "shard-split-post-remote-sleep": sleeping' ), diff --git a/test_runner/regress/test_subscriber_restart.py b/test_runner/regress/test_subscriber_restart.py index d37eeb1e6ebb..7d4f66d04448 100644 --- a/test_runner/regress/test_subscriber_restart.py +++ b/test_runner/regress/test_subscriber_restart.py @@ -56,4 +56,4 @@ def insert_data(pub): pcur.execute(f"INSERT into t values ({n_records}, 0)") n_records += 1 with sub.cursor() as scur: - wait_until(60, 0.5, check_that_changes_propagated) + wait_until(check_that_changes_propagated) diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py index 1dd46ec3d111..f8f240cfdcc4 100644 --- a/test_runner/regress/test_tenant_conf.py +++ b/test_runner/regress/test_tenant_conf.py @@ -234,11 +234,7 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder): assert not config_path.exists(), "detach did not remove config file" env.pageserver.tenant_attach(tenant_id) - wait_until( - number_of_iterations=5, - interval=1, - func=lambda: assert_tenant_state(http_client, tenant_id, "Active"), - ) + wait_until(lambda: assert_tenant_state(http_client, tenant_id, "Active")) env.config_tenant(tenant_id, {"gc_horizon": "1000000"}) contents_first = config_path.read_text() diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py index 47df3ead7020..48e55c1ab15b 100644 --- a/test_runner/regress/test_tenant_delete.py +++ b/test_runner/regress/test_tenant_delete.py @@ -185,21 +185,21 @@ def tenant_is_deleted(): deletion = None try: - wait_until(10, 1, has_hit_failpoint) + wait_until(has_hit_failpoint) # it should start ok, sync up with the stuck creation, then hang waiting for the timeline # to shut down. deletion = Thread(target=start_deletion) deletion.start() - wait_until(10, 1, deletion_has_started_waiting_for_timelines) + wait_until(deletion_has_started_waiting_for_timelines) pageserver_http.configure_failpoints((failpoint, "off")) creation.join() deletion.join() - wait_until(10, 1, tenant_is_deleted) + wait_until(tenant_is_deleted) finally: creation.join() if deletion is not None: @@ -264,7 +264,7 @@ def timeline_create(): def hit_initdb_upload_failpoint(): env.pageserver.assert_log_contains(f"at failpoint {BEFORE_INITDB_UPLOAD_FAILPOINT}") - wait_until(100, 0.1, hit_initdb_upload_failpoint) + wait_until(hit_initdb_upload_failpoint) def creation_connection_timed_out(): env.pageserver.assert_log_contains( @@ -273,7 +273,7 @@ def creation_connection_timed_out(): # Wait so that we hit the timeout and the connection is dropped # (But timeline creation still continues) - wait_until(100, 0.1, creation_connection_timed_out) + wait_until(creation_connection_timed_out) ps_http.configure_failpoints((DELETE_BEFORE_CLEANUP_FAILPOINT, "pause")) @@ -281,7 +281,7 @@ def tenant_delete(): def tenant_delete_inner(): ps_http.tenant_delete(tenant_id) - wait_until(100, 0.5, tenant_delete_inner) + wait_until(tenant_delete_inner) Thread(target=tenant_delete).start() @@ -290,7 +290,7 @@ def deletion_arrived(): f"cfg failpoint: {DELETE_BEFORE_CLEANUP_FAILPOINT} pause" ) - wait_until(100, 0.1, deletion_arrived) + wait_until(deletion_arrived) ps_http.configure_failpoints((DELETE_BEFORE_CLEANUP_FAILPOINT, "off")) diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index 8d7ca7bc4e14..3f21dc895a3b 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -212,7 +212,7 @@ async def sleep_and_reattach(pageserver_http: PageserverHttpClient, tenant_id: T nonlocal updates_started, updates_finished, updates_to_perform # Wait until we have performed some updates - wait_until(20, 0.5, lambda: updates_finished > 500) + wait_until(lambda: updates_finished > 500) log.info("Detaching tenant") pageserver_http.tenant_detach(tenant_id) @@ -512,7 +512,7 @@ def found_broken(): ) assert only_int(active) == 0 and only_int(broken) == 1 and only_int(broken_set) == 1 - wait_until(10, 0.5, found_broken) + wait_until(found_broken) client.tenant_detach(env.initial_tenant) @@ -524,7 +524,7 @@ def found_cleaned_up(): ) assert only_int(broken) == 0 and len(broken_set) == 0 - wait_until(10, 0.5, found_cleaned_up) + wait_until(found_cleaned_up) env.pageserver.tenant_attach(env.initial_tenant) @@ -536,4 +536,4 @@ def found_active(): ) assert only_int(active) == 1 and len(broken_set) == 0 - wait_until(10, 0.5, found_active) + wait_until(found_active) diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py index bf6120aa0aab..df53a98e926c 100644 --- a/test_runner/regress/test_tenant_relocation.py +++ b/test_runner/regress/test_tenant_relocation.py @@ -298,11 +298,7 @@ def test_tenant_relocation( destination_ps.tenant_attach(tenant_id) # wait for tenant to finish attaching - wait_until( - number_of_iterations=10, - interval=1, - func=lambda: assert_tenant_state(destination_http, tenant_id, "Active"), - ) + wait_until(lambda: assert_tenant_state(destination_http, tenant_id, "Active")) check_timeline_attached( destination_http, diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 8b733da0c67f..713f89c60f6a 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -638,7 +638,7 @@ def test_synthetic_size_while_deleting(neon_env_builder: NeonEnvBuilder): with ThreadPoolExecutor(max_workers=1) as exec: completion = exec.submit(client.tenant_size, env.initial_tenant) _, last_offset = wait_until( - 10, 1.0, lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}") + lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}") ) timeline_delete_wait_completed(client, env.initial_tenant, branch_id) @@ -656,8 +656,6 @@ def test_synthetic_size_while_deleting(neon_env_builder: NeonEnvBuilder): with ThreadPoolExecutor(max_workers=1) as exec: completion = exec.submit(client.tenant_size, env.initial_tenant) wait_until( - 10, - 1.0, lambda: env.pageserver.assert_log_contains( f"at failpoint {failpoint}", offset=last_offset ), diff --git a/test_runner/regress/test_tenant_tasks.py b/test_runner/regress/test_tenant_tasks.py index 72183f5778b9..4c26b64d22b9 100644 --- a/test_runner/regress/test_tenant_tasks.py +++ b/test_runner/regress/test_tenant_tasks.py @@ -77,4 +77,4 @@ def assert_tasks_finish(): assert tasks_started == tasks_ended assert tasks_panicked is None or int(tasks_panicked) == 0 - wait_until(10, 0.2, assert_tasks_finish) + wait_until(assert_tasks_finish) diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 158c3fddb0b3..d31901b384e5 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -330,7 +330,7 @@ def not_attaching(): assert len(tenants) == 1 assert all(t["state"]["slug"] != "Attaching" for t in tenants) - wait_until(10, 0.2, not_attaching) + wait_until(not_attaching) tenants = client.tenant_list() diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py index 8d3ddf7e54a5..6b27c41d1c3b 100644 --- a/test_runner/regress/test_tenants_with_remote_storage.py +++ b/test_runner/regress/test_tenants_with_remote_storage.py @@ -178,11 +178,7 @@ def test_tenants_attached_after_download(neon_env_builder: NeonEnvBuilder): env.pageserver.start() client = env.pageserver.http_client() - wait_until( - number_of_iterations=5, - interval=1, - func=lambda: assert_tenant_state(client, tenant_id, "Active"), - ) + wait_until(lambda: assert_tenant_state(client, tenant_id, "Active")) restored_timelines = client.timeline_list(tenant_id) assert ( @@ -257,11 +253,7 @@ def test_tenant_redownloads_truncated_file_on_startup( env.pageserver.start() client = env.pageserver.http_client() - wait_until( - number_of_iterations=5, - interval=1, - func=lambda: assert_tenant_state(client, tenant_id, "Active"), - ) + wait_until(lambda: assert_tenant_state(client, tenant_id, "Active")) restored_timelines = client.timeline_list(tenant_id) assert ( diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index bc2e048f6942..e808dd13966c 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -227,8 +227,8 @@ def leaf_offloaded(): ps_http.timeline_offload(tenant_id=tenant_id, timeline_id=leaf_timeline_id) assert timeline_offloaded_logged(leaf_timeline_id) - wait_until(30, 1, leaf_offloaded) - wait_until(30, 1, parent_offloaded) + wait_until(leaf_offloaded) + wait_until(parent_offloaded) # Offloaded child timelines should still prevent deletion with pytest.raises( @@ -331,7 +331,7 @@ def child_offloaded(): ps_http.timeline_offload(tenant_id=tenant_id, timeline_id=child_timeline_id) assert timeline_offloaded_api(child_timeline_id) - wait_until(30, 1, child_offloaded) + wait_until(child_offloaded) assert timeline_offloaded_api(child_timeline_id) assert not timeline_offloaded_api(root_timeline_id) @@ -835,3 +835,117 @@ def test_timeline_retain_lsn( with env.endpoints.create_start("test_archived_branch", tenant_id=tenant_id) as endpoint: sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200") assert sum == pre_branch_sum + + +def test_timeline_offload_generations(neon_env_builder: NeonEnvBuilder): + """ + Test for scrubber deleting old generations of manifests + """ + remote_storage_kind = s3_storage() + neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind) + + env = neon_env_builder.init_start() + ps_http = env.pageserver.http_client() + + # Turn off gc and compaction loops: we want to issue them manually for better reliability + tenant_id, root_timeline_id = env.create_tenant( + conf={ + "gc_period": "0s", + "compaction_period": "0s", + "checkpoint_distance": f"{1024 ** 2}", + } + ) + + # Create a branch and archive it + child_timeline_id = env.create_branch("test_archived_branch_persisted", tenant_id) + + with env.endpoints.create_start( + "test_archived_branch_persisted", tenant_id=tenant_id + ) as endpoint: + endpoint.safe_psql_many( + [ + "CREATE TABLE foo(key serial primary key, t text default 'data_content')", + "INSERT INTO foo SELECT FROM generate_series(1,512)", + ] + ) + sum = endpoint.safe_psql("SELECT sum(key) from foo where key % 3 = 2") + last_flush_lsn_upload(env, endpoint, tenant_id, child_timeline_id) + + assert_prefix_not_empty( + neon_env_builder.pageserver_remote_storage, + prefix=f"tenants/{str(tenant_id)}/", + ) + assert_prefix_empty( + neon_env_builder.pageserver_remote_storage, + prefix=f"tenants/{str(tenant_id)}/tenant-manifest", + ) + + ps_http.timeline_archival_config( + tenant_id, + child_timeline_id, + state=TimelineArchivalState.ARCHIVED, + ) + + def timeline_offloaded_api(timeline_id: TimelineId) -> bool: + # TODO add a proper API to check if a timeline has been offloaded or not + return not any( + timeline["timeline_id"] == str(timeline_id) + for timeline in ps_http.timeline_list(tenant_id=tenant_id) + ) + + def child_offloaded(): + ps_http.timeline_offload(tenant_id=tenant_id, timeline_id=child_timeline_id) + assert timeline_offloaded_api(child_timeline_id) + + wait_until(child_offloaded) + + assert timeline_offloaded_api(child_timeline_id) + assert not timeline_offloaded_api(root_timeline_id) + + # Reboot the pageserver a bunch of times, do unoffloads, offloads + for i in range(5): + env.pageserver.stop() + env.pageserver.start() + + assert timeline_offloaded_api(child_timeline_id) + assert not timeline_offloaded_api(root_timeline_id) + + ps_http.timeline_archival_config( + tenant_id, + child_timeline_id, + state=TimelineArchivalState.UNARCHIVED, + ) + + assert not timeline_offloaded_api(child_timeline_id) + + if i % 2 == 0: + with env.endpoints.create_start( + "test_archived_branch_persisted", tenant_id=tenant_id + ) as endpoint: + sum_again = endpoint.safe_psql("SELECT sum(key) from foo where key % 3 = 2") + assert sum == sum_again + + ps_http.timeline_archival_config( + tenant_id, + child_timeline_id, + state=TimelineArchivalState.ARCHIVED, + ) + wait_until(child_offloaded) + + # + # Now ensure that scrubber runs will clean up old generations' manifests. + # + + # Sleep some amount larger than min_age_secs + time.sleep(3) + + # Ensure that min_age_secs has a deletion impeding effect + gc_summary = env.storage_scrubber.pageserver_physical_gc(min_age_secs=3600, mode="full") + assert gc_summary["remote_storage_errors"] == 0 + assert gc_summary["indices_deleted"] == 0 + assert gc_summary["tenant_manifests_deleted"] == 0 + + gc_summary = env.storage_scrubber.pageserver_physical_gc(min_age_secs=1, mode="full") + assert gc_summary["remote_storage_errors"] == 0 + assert gc_summary["indices_deleted"] > 0 + assert gc_summary["tenant_manifests_deleted"] > 0 diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py index 155709e1066d..fbece6836729 100644 --- a/test_runner/regress/test_timeline_delete.py +++ b/test_runner/regress/test_timeline_delete.py @@ -21,7 +21,6 @@ assert_prefix_empty, assert_prefix_not_empty, many_small_layers_tenant_config, - poll_for_remote_storage_iterations, timeline_delete_wait_completed, wait_for_last_record_lsn, wait_for_upload, @@ -94,12 +93,7 @@ def test_timeline_delete(neon_simple_env: NeonEnv): assert timeline_path.exists() # retry deletes when compaction or gc is running in pageserver - # TODO: review whether this wait_until is actually necessary, we do an await() internally - wait_until( - number_of_iterations=3, - interval=0.2, - func=lambda: timeline_delete_wait_completed(ps_http, env.initial_tenant, leaf_timeline_id), - ) + timeline_delete_wait_completed(ps_http, env.initial_tenant, leaf_timeline_id) assert not timeline_path.exists() @@ -111,13 +105,7 @@ def test_timeline_delete(neon_simple_env: NeonEnv): ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id) assert exc.value.status_code == 404 - wait_until( - number_of_iterations=3, - interval=0.2, - func=lambda: timeline_delete_wait_completed( - ps_http, env.initial_tenant, parent_timeline_id - ), - ) + timeline_delete_wait_completed(ps_http, env.initial_tenant, parent_timeline_id) # Check that we didn't pick up the timeline again after restart. # See https://github.com/neondatabase/neon/issues/3560 @@ -226,8 +214,6 @@ def test_delete_timeline_exercise_crash_safety_failpoints( ps_http.configure_failpoints((failpoint, "return")) - iterations = poll_for_remote_storage_iterations(remote_storage_kind) - # These failpoints are earlier than background task is spawned. # so they result in api request failure. if failpoint in ( @@ -244,7 +230,7 @@ def test_delete_timeline_exercise_crash_safety_failpoints( tenant_id=env.initial_tenant, timeline_id=timeline_id, expected_state="Broken", - iterations=iterations, + iterations=40, ) reason = timeline_info["state"]["Broken"]["reason"] @@ -257,25 +243,21 @@ def test_delete_timeline_exercise_crash_safety_failpoints( env.pageserver.stop() env.pageserver.start() - wait_until_tenant_active(ps_http, env.initial_tenant, iterations=iterations) + wait_until_tenant_active(ps_http, env.initial_tenant) if failpoint == "timeline-delete-before-index-deleted-at": # We crashed before persisting this to remote storage, need to retry delete request timeline_delete_wait_completed(ps_http, env.initial_tenant, timeline_id) else: # Pageserver should've resumed deletion after restart. - wait_timeline_detail_404( - ps_http, env.initial_tenant, timeline_id, iterations=iterations - ) + wait_timeline_detail_404(ps_http, env.initial_tenant, timeline_id) elif check is Check.RETRY_WITHOUT_RESTART: # this should succeed # this also checks that delete can be retried even when timeline is in Broken state ps_http.configure_failpoints((failpoint, "off")) - timeline_delete_wait_completed( - ps_http, env.initial_tenant, timeline_id, iterations=iterations - ) + timeline_delete_wait_completed(ps_http, env.initial_tenant, timeline_id) # Check remote is empty if remote_storage_kind is RemoteStorageKind.MOCK_S3: @@ -378,7 +360,7 @@ def test_timeline_resurrection_on_attach( env.pageserver.tenant_attach(tenant_id=tenant_id) - wait_until_tenant_active(ps_http, tenant_id=tenant_id, iterations=10, period=0.5) + wait_until_tenant_active(ps_http, tenant_id=tenant_id) timelines = ps_http.timeline_list(tenant_id=tenant_id) assert {TimelineId(tl["timeline_id"]) for tl in timelines} == { @@ -439,7 +421,7 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild # Wait for tenant to finish loading. wait_until_tenant_active(ps_http, tenant_id=env.initial_tenant, iterations=10, period=1) - wait_timeline_detail_404(ps_http, env.initial_tenant, leaf_timeline_id, iterations=4) + wait_timeline_detail_404(ps_http, env.initial_tenant, leaf_timeline_id) assert ( not leaf_timeline_path.exists() @@ -481,11 +463,10 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild ) # for some reason the check above doesnt immediately take effect for the below. - # Assume it is mock server incosistency and check twice. + # Assume it is mock server incosistency and check a few times. wait_until( - 2, - 0.5, lambda: assert_prefix_empty(neon_env_builder.pageserver_remote_storage), + timeout=2, ) # We deleted our only tenant, and the scrubber fails if it detects nothing @@ -544,7 +525,7 @@ def first_call_hit_failpoint(): f".*{child_timeline_id}.*at failpoint {stuck_failpoint}" ) - wait_until(50, 0.1, first_call_hit_failpoint) + wait_until(first_call_hit_failpoint, interval=0.1, status_interval=1.0) # make the second call and assert behavior log.info("second call start") @@ -613,7 +594,7 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder): def hit_failpoint(): env.pageserver.assert_log_contains(at_failpoint_log_message) - wait_until(50, 0.1, hit_failpoint) + wait_until(hit_failpoint, interval=0.1) # we log this error if a client hangs up # might as well use it as another indicator that the test works @@ -623,7 +604,7 @@ def hit_failpoint(): def got_hangup_log_message(): env.pageserver.assert_log_contains(hangup_log_message) - wait_until(50, 0.1, got_hangup_log_message) + wait_until(got_hangup_log_message, interval=0.1) # check that the timeline is still present ps_http.timeline_detail(env.initial_tenant, child_timeline_id) @@ -635,10 +616,10 @@ def first_request_finished(): message = f".*DELETE.*{child_timeline_id}.*Cancelled request finished" env.pageserver.assert_log_contains(message) - wait_until(50, 0.1, first_request_finished) + wait_until(first_request_finished, interval=0.1) # check that the timeline is gone - wait_timeline_detail_404(ps_http, env.initial_tenant, child_timeline_id, iterations=10) + wait_timeline_detail_404(ps_http, env.initial_tenant, child_timeline_id) def test_timeline_delete_works_for_remote_smoke( @@ -707,7 +688,7 @@ def test_timeline_delete_works_for_remote_smoke( # for some reason the check above doesnt immediately take effect for the below. # Assume it is mock server inconsistency and check twice. - wait_until(2, 0.5, lambda: assert_prefix_empty(neon_env_builder.pageserver_remote_storage)) + wait_until(lambda: assert_prefix_empty(neon_env_builder.pageserver_remote_storage)) # We deleted our only tenant, and the scrubber fails if it detects nothing neon_env_builder.disable_scrub_on_exit() @@ -753,15 +734,13 @@ def test_delete_orphaned_objects( env.pageserver.allowed_errors.append(f".*failpoint: {failpoint}") - iterations = poll_for_remote_storage_iterations(remote_storage_kind) - ps_http.timeline_delete(env.initial_tenant, timeline_id) timeline_info = wait_until_timeline_state( pageserver_http=ps_http, tenant_id=env.initial_tenant, timeline_id=timeline_id, expected_state="Broken", - iterations=iterations, + iterations=40, ) reason = timeline_info["state"]["Broken"]["reason"] @@ -827,8 +806,6 @@ def test_timeline_delete_resumed_on_attach( ) ) - iterations = poll_for_remote_storage_iterations(remote_storage_kind) - ps_http.timeline_delete(tenant_id, timeline_id) timeline_info = wait_until_timeline_state( @@ -836,7 +813,7 @@ def test_timeline_delete_resumed_on_attach( tenant_id=env.initial_tenant, timeline_id=timeline_id, expected_state="Broken", - iterations=iterations, + iterations=40, ) reason = timeline_info["state"]["Broken"]["reason"] @@ -871,7 +848,7 @@ def test_timeline_delete_resumed_on_attach( env.pageserver.tenant_attach(tenant_id=tenant_id) # delete should be resumed - wait_timeline_detail_404(ps_http, env.initial_tenant, timeline_id, iterations=iterations) + wait_timeline_detail_404(ps_http, env.initial_tenant, timeline_id) tenant_path = env.pageserver.timeline_dir(tenant_id, timeline_id) assert not tenant_path.exists() diff --git a/test_runner/regress/test_timeline_detach_ancestor.py b/test_runner/regress/test_timeline_detach_ancestor.py index 9c7e851ba87b..2c3ee38baef1 100644 --- a/test_runner/regress/test_timeline_detach_ancestor.py +++ b/test_runner/regress/test_timeline_detach_ancestor.py @@ -203,7 +203,7 @@ def test_ancestor_detach_branched_from( ) client.timeline_delete(env.initial_tenant, env.initial_timeline) - wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline, 10, 1.0) + wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline) # because we do the fullbackup from ancestor at the branch_lsn, the zenith.signal is always different # as there is always "PREV_LSN: invalid" for "before" @@ -336,10 +336,10 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder): # delete the timelines to confirm detach actually worked client.timeline_delete(env.initial_tenant, after) - wait_timeline_detail_404(client, env.initial_tenant, after, 10, 1.0) + wait_timeline_detail_404(client, env.initial_tenant, after) client.timeline_delete(env.initial_tenant, env.initial_timeline) - wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline, 10, 1.0) + wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline) def test_detached_receives_flushes_while_being_detached(neon_env_builder: NeonEnvBuilder): @@ -973,17 +973,17 @@ def is_deleted(): with ThreadPoolExecutor(max_workers=2) as pool: try: fut = pool.submit(detach_ancestor) - offset = wait_until(10, 1.0, at_failpoint) + offset = wait_until(at_failpoint) delete = pool.submit(start_delete) - offset = wait_until(10, 1.0, lambda: at_waiting_on_gate_close(offset)) + offset = wait_until(lambda: at_waiting_on_gate_close(offset)) victim_http.configure_failpoints((pausepoint, "off")) delete.result() - assert wait_until(10, 1.0, is_deleted), f"unimplemented mode {mode}" + assert wait_until(is_deleted), f"unimplemented mode {mode}" # TODO: match the error with pytest.raises(PageserverApiException) as exc: @@ -1115,11 +1115,11 @@ def first_branch_gone(): with ThreadPoolExecutor(max_workers=1) as pool: try: fut = pool.submit(detach_timeline) - wait_until(10, 1.0, paused_at_failpoint) + wait_until(paused_at_failpoint) # let stuck complete stuck_http.configure_failpoints((pausepoint, "off")) - wait_until(10, 1.0, first_completed) + wait_until(first_completed) if mode == "delete_reparentable_timeline": assert first_branch is not None @@ -1127,7 +1127,7 @@ def first_branch_gone(): env.initial_tenant, first_branch ) victim_http.configure_failpoints((pausepoint, "off")) - wait_until(10, 1.0, first_branch_gone) + wait_until(first_branch_gone) elif mode == "create_reparentable_timeline": first_branch = create_reparentable_timeline() victim_http.configure_failpoints((pausepoint, "off")) @@ -1271,11 +1271,11 @@ def first_completed(): with ThreadPoolExecutor(max_workers=1) as pool: try: fut = pool.submit(detach_timeline) - wait_until(10, 1.0, paused_at_failpoint) + wait_until(paused_at_failpoint) # let stuck complete stuck_http.configure_failpoints((pausepoint, "off")) - wait_until(10, 1.0, first_completed) + wait_until(first_completed) victim_http.configure_failpoints((pausepoint, "off")) @@ -1456,7 +1456,7 @@ def try_detach(): # other tests take the "detach? reparent complete", but this only hits # "complete". http.timeline_delete(env.initial_tenant, env.initial_timeline) - wait_timeline_detail_404(http, env.initial_tenant, env.initial_timeline, 20) + wait_timeline_detail_404(http, env.initial_tenant, env.initial_timeline) http.configure_failpoints(("timeline-detach-ancestor::complete_before_uploading", "off")) @@ -1518,7 +1518,7 @@ def delete_detached(): with ThreadPoolExecutor(max_workers=1) as pool: detach = pool.submit(detach_and_get_stuck) - offset = wait_until(10, 1.0, request_processing_noted_in_log) + offset = wait_until(request_processing_noted_in_log) # make this named fn tor more clear failure test output logging def pausepoint_hit_with_gc_paused() -> LogCursor: @@ -1529,11 +1529,11 @@ def pausepoint_hit_with_gc_paused() -> LogCursor: ) return at - offset = wait_until(10, 1.0, pausepoint_hit_with_gc_paused) + offset = wait_until(pausepoint_hit_with_gc_paused) delete_detached() - wait_timeline_detail_404(http, env.initial_tenant, detached, 10, 1.0) + wait_timeline_detail_404(http, env.initial_tenant, detached) http.configure_failpoints((failpoint, "off")) diff --git a/test_runner/regress/test_timeline_gc_blocking.py b/test_runner/regress/test_timeline_gc_blocking.py index 5a5ca3290a07..7605e1f758b9 100644 --- a/test_runner/regress/test_timeline_gc_blocking.py +++ b/test_runner/regress/test_timeline_gc_blocking.py @@ -61,7 +61,7 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool # deletion unblocks gc http.timeline_delete(env.initial_tenant, foo_branch) - wait_timeline_detail_404(http, env.initial_tenant, foo_branch, 10, 1.0) + wait_timeline_detail_404(http, env.initial_tenant, foo_branch) wait_for_another_gc_round() pss.assert_log_contains(gc_active_line) diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index 4528bc618044..95bf9106cd94 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -396,11 +396,7 @@ def test_timeline_physical_size_init(neon_env_builder: NeonEnvBuilder): # Wait for the tenant to be loaded client = env.pageserver.http_client() - wait_until( - number_of_iterations=5, - interval=1, - func=lambda: assert_tenant_state(client, env.initial_tenant, "Active"), - ) + wait_until(lambda: assert_tenant_state(client, env.initial_tenant, "Active")) assert_physical_size_invariants( get_physical_size_values(env, env.initial_tenant, new_timeline_id), @@ -433,7 +429,7 @@ def check(): get_physical_size_values(env, env.initial_tenant, new_timeline_id), ) - wait_until(10, 1, check) + wait_until(check) def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder): @@ -721,7 +717,7 @@ def wait_for_tenant_startup_completions(client: PageserverHttpClient, count: int def condition(): assert client.get_metric_value("pageserver_tenant_startup_complete_total") == count - wait_until(5, 1.0, condition) + wait_until(condition) def test_ondemand_activation(neon_env_builder: NeonEnvBuilder): @@ -768,7 +764,7 @@ def at_least_one_active(): assert "Active" in set(get_tenant_states().values()) # One tenant should activate, then get stuck in their logical size calculation - wait_until(10, 1, at_least_one_active) + wait_until(at_least_one_active) # Wait some walltime to gain confidence that other tenants really are stuck and not proceeding to activate time.sleep(5) @@ -836,13 +832,13 @@ def at_least_one_active(): def all_active(): assert all(s == "Active" for s in get_tenant_states().values()) - wait_until(10, 1, all_active) + wait_until(all_active) # Final control check: restarting with no failpoints at all results in all tenants coming active # without being prompted by client I/O env.pageserver.stop() env.pageserver.start() - wait_until(10, 1, all_active) + wait_until(all_active) assert ( pageserver_http.get_metric_value("pageserver_tenant_startup_scheduled_total") == n_tenants @@ -856,7 +852,7 @@ def all_active(): extra_env_vars={"FAILPOINTS": "timeline-calculate-logical-size-pause=pause"} ) - wait_until(10, 1, at_least_one_active) + wait_until(at_least_one_active) detach_tenant_id = list( [(tid, s) for (tid, s) in get_tenant_states().items() if s == "Attaching"] @@ -881,7 +877,7 @@ def all_active(): # Check that all the stuck tenants proceed to active (apart from the one that deletes, and the one # we detached) - wait_until(10, 1, all_active) + wait_until(all_active) assert len(get_tenant_states()) == n_tenants - 2 @@ -908,7 +904,7 @@ def delete_tenant(): try: # Deletion will get to the point in shutdown where it's waiting for timeline shutdown, then # hang because of our failpoint blocking activation. - wait_until(10, 1, shutting_down) + wait_until(shutting_down) finally: log.info("Clearing failpoint") pageserver_http.configure_failpoints(("timeline-calculate-logical-size-pause", "off")) @@ -1030,13 +1026,13 @@ def one_is_active(): log.info(f"{states}") assert len(states["Active"]) == 1 - wait_until(10, 1, one_is_active) + wait_until(one_is_active) def other_is_attaching(): states = get_tenant_states() assert len(states["Attaching"]) == 1 - wait_until(10, 1, other_is_attaching) + wait_until(other_is_attaching) def eager_tenant_is_active(): resp = client.tenant_status(eager_tenant) @@ -1053,7 +1049,7 @@ def eager_tenant_is_active(): }, lazy=False, ) - wait_until(10, 1, eager_tenant_is_active) + wait_until(eager_tenant_is_active) other_is_attaching() @@ -1096,7 +1092,7 @@ def initial_tenant_is_active(): resp = client.tenant_status(env.initial_tenant) assert resp["state"]["slug"] == "Active" - wait_until(10, 1, initial_tenant_is_active) + wait_until(initial_tenant_is_active) # even though the initial tenant is now active, because it was startup time # attach, it will consume the only permit because logical size calculation @@ -1119,7 +1115,7 @@ def lazy_tenant_is_attaching(): assert resp["state"]["slug"] == "Attaching" # paused logical size calculation of env.initial_tenant is keeping it attaching - wait_until(10, 1, lazy_tenant_is_attaching) + wait_until(lazy_tenant_is_attaching) for _ in range(5): lazy_tenant_is_attaching() @@ -1132,10 +1128,10 @@ def lazy_tenant_is_active(): if activation_method == "endpoint": with env.endpoints.create_start("main", tenant_id=lazy_tenant): # starting up the endpoint should make it jump the queue - wait_until(10, 1, lazy_tenant_is_active) + wait_until(lazy_tenant_is_active) elif activation_method == "branch": env.create_timeline("second_branch", lazy_tenant) - wait_until(10, 1, lazy_tenant_is_active) + wait_until(lazy_tenant_is_active) elif activation_method == "delete": delete_lazy_activating(lazy_tenant, env.pageserver, expect_attaching=True) else: diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 8fa33b81a9dc..23d4f23cdb84 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -2136,7 +2136,7 @@ def evicted_on_source(): # Check that on source no segment files are present assert src_sk.list_segments(tenant_id, timeline_id) == [] - wait_until(60, 1, evicted_on_source) + wait_until(evicted_on_source, timeout=60) # Invoke pull_timeline: source should serve snapshot request without promoting anything to local disk, # destination should import the control file only & go into evicted mode immediately @@ -2155,7 +2155,7 @@ def evicted_on_destination(): # This should be fast, it is a wait_until because eviction state is updated # in the background wrt pull_timeline. - wait_until(10, 0.1, evicted_on_destination) + wait_until(evicted_on_destination, timeout=1.0, interval=0.1) # Delete the timeline on the source, to prove that deletion works on an # evicted timeline _and_ that the final compute test is really not using @@ -2178,7 +2178,7 @@ def unevicted_on_dest(): n_evicted = dst_sk.http_client().get_metric_value("safekeeper_evicted_timelines") assert n_evicted == 0 - wait_until(10, 1, unevicted_on_dest) + wait_until(unevicted_on_dest, interval=0.1, timeout=1.0) # In this test we check for excessive START_REPLICATION and START_WAL_PUSH queries @@ -2606,10 +2606,10 @@ def all_evicted(): assert n_evicted # make mypy happy assert int(n_evicted) == n_timelines - wait_until(60, 0.5, all_evicted) + wait_until(all_evicted, timeout=30) # restart should preserve the metric value sk.stop().start() - wait_until(60, 0.5, all_evicted) + wait_until(all_evicted) # and endpoint start should reduce is endpoints[0].start() @@ -2618,7 +2618,7 @@ def one_unevicted(): assert n_evicted # make mypy happy assert int(n_evicted) < n_timelines - wait_until(60, 0.5, one_unevicted) + wait_until(one_unevicted) # Test resetting uploaded partial segment state. @@ -2666,7 +2666,7 @@ def evicted(): if isinstance(eviction_state, str) and eviction_state == "Present": raise Exception("eviction didn't happen yet") - wait_until(30, 1, evicted) + wait_until(evicted) # it must have uploaded something uploaded_segs = sk.list_uploaded_segments(tenant_id, timeline_id) log.info(f"uploaded segments before reset: {uploaded_segs}") @@ -2763,7 +2763,7 @@ def source_partial_segment_uploaded(): raise Exception("Partial segment not uploaded yet") - source_partial_segment = wait_until(15, 1, source_partial_segment_uploaded) + source_partial_segment = wait_until(source_partial_segment_uploaded) log.info( f"Uploaded segments before pull are {src_sk.list_uploaded_segments(tenant_id, timeline_id)}" ) @@ -2787,7 +2787,7 @@ def evicted(): if evictions is None or evictions == 0: raise Exception("Eviction did not happen on source safekeeper yet") - wait_until(30, 1, evicted) + wait_until(evicted) endpoint.start(safekeepers=[2, 3]) @@ -2804,7 +2804,7 @@ def new_partial_segment_uploaded(): ) endpoint.safe_psql("insert into t select generate_series(1, 1000), 'pear'") - wait_until(15, 1, new_partial_segment_uploaded) + wait_until(new_partial_segment_uploaded) log.info( f"Uploaded segments after post-pull ingest are {src_sk.list_uploaded_segments(tenant_id, timeline_id)}" @@ -2833,4 +2833,4 @@ def unevicted(): if unevictions is None or unevictions == 0: raise Exception("Uneviction did not happen on source safekeeper yet") - wait_until(10, 1, unevicted) + wait_until(unevicted) diff --git a/test_runner/regress/test_wal_receiver.py b/test_runner/regress/test_wal_receiver.py index 294f86ffa720..d22a900c5923 100644 --- a/test_runner/regress/test_wal_receiver.py +++ b/test_runner/regress/test_wal_receiver.py @@ -97,7 +97,7 @@ def all_sks_in_wareceiver_state(): str(safekeeper.id) in exception_string ), f"Should have safekeeper {safekeeper.id} printed in walreceiver state after WAL wait timeout" - wait_until(60, 0.5, all_sks_in_wareceiver_state) + wait_until(all_sks_in_wareceiver_state, timeout=30) stopped_safekeeper = env.safekeepers[-1] stopped_safekeeper_id = stopped_safekeeper.id @@ -124,7 +124,7 @@ def all_but_stopped_sks_in_wareceiver_state(): str(safekeeper.id) in exception_string ), f"Should have safekeeper {safekeeper.id} printed in walreceiver state after 2nd WAL wait timeout" - wait_until(60, 0.5, all_but_stopped_sks_in_wareceiver_state) + wait_until(all_but_stopped_sks_in_wareceiver_state, timeout=30) def insert_test_elements(env: NeonEnv, tenant_id: TenantId, start: int, count: int): diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index c0a3abc3774a..d19379aefdf6 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -55,12 +55,16 @@ log = { version = "0.4", default-features = false, features = ["std"] } memchr = { version = "2" } nix = { version = "0.26" } nom = { version = "7" } +num = { version = "0.4" } num-bigint = { version = "0.4" } +num-complex = { version = "0.4", default-features = false, features = ["std"] } num-integer = { version = "0.1", features = ["i128"] } +num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] } +num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] } num-traits = { version = "0.2", features = ["i128", "libm"] } once_cell = { version = "1" } parquet = { version = "53", default-features = false, features = ["zstd"] } -prost = { version = "0.13", features = ["prost-derive"] } +prost = { version = "0.13", features = ["no-recursion-limit", "prost-derive"] } rand = { version = "0.8", features = ["small_rng"] } regex = { version = "1" } regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } @@ -76,7 +80,8 @@ smallvec = { version = "1", default-features = false, features = ["const_new", " spki = { version = "0.7", default-features = false, features = ["pem", "std"] } subtle = { version = "2" } sync_wrapper = { version = "0.1", default-features = false, features = ["futures"] } -tikv-jemalloc-sys = { version = "0.6", features = ["stats"] } +tikv-jemalloc-ctl = { version = "0.6", features = ["stats", "use_std"] } +tikv-jemalloc-sys = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] } time = { version = "0.3", features = ["macros", "serde-well-known"] } tokio = { version = "1", features = ["full", "test-util"] } tokio-rustls = { version = "0.26", default-features = false, features = ["logging", "ring", "tls12"] } @@ -111,14 +116,18 @@ libc = { version = "0.2", features = ["extra_traits", "use_std"] } log = { version = "0.4", default-features = false, features = ["std"] } memchr = { version = "2" } nom = { version = "7" } +num = { version = "0.4" } num-bigint = { version = "0.4" } +num-complex = { version = "0.4", default-features = false, features = ["std"] } num-integer = { version = "0.1", features = ["i128"] } +num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] } +num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] } num-traits = { version = "0.2", features = ["i128", "libm"] } once_cell = { version = "1" } parquet = { version = "53", default-features = false, features = ["zstd"] } prettyplease = { version = "0.2", default-features = false, features = ["verbatim"] } proc-macro2 = { version = "1" } -prost = { version = "0.13", features = ["prost-derive"] } +prost = { version = "0.13", features = ["no-recursion-limit", "prost-derive"] } quote = { version = "1" } regex = { version = "1" } regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }