Merge commit '9e07d7966ae1964c3f2e1db3b659da6fdbbc01c4' into pg15-mas…

…ter-merge Merge YB master commit 9e07d79 titled [#21058] DocDB: Revert "[#14165] DocDB: Rollback only the newest transaction in a deadlock" and committed 2024-02-15T17:28:56-06:00 into YB pg15. YB pg15 initial merge is 55782d5. - jenkins_jobs.yml: YB master 9f91eeb changes clang16 to clang17. pg15 branch has multiple modifications to this file. Apply both. - yb_uniqkeys.c: yb_is_const_clause_for_distinct_pushdown: YB master 9161aec and YB pg15 40e68e2 touch the same code. Take pg15's version, as suggested by Patnaik. - slot.c: ReplicationSlotCreate: YB master aa3528e adds extra parameter CRSSnapshotAction yb_snapshot_action; upstream PG 19890a064ebf53dedcefed0d8339ed3d449b06e6 adds extra parameter two_phase. Apply both. Maybe two_phase should be passed down to YBCCreateReplicationSlot. - slotfuncs.c: upstream PG 9f06d79ef831ffa333f908f6d3debdb654292414 moves two ReplicationSlotCreate calls into create_physical_replication_slot and create_logical_replication_slot. YB master aa3528e passes CRS_NOEXPORT_SNAPSHOT to ReplicationSlotCreate. Move that and any comments. Note that create_logical_replication_slot and create_physical_replication_slot are also additionally called by copy_replication_slot, and hopefully CRS_NOEXPORT_SNAPSHOT makes sense for that case. - walsender.c: CreateReplicationSlot: (same as slot.c) - elog.c: - function declarations: YB pg15 85f8a82 moves declarations up, so apply the changes of YB master 2d0bd35 there. - yb_write_status_to_server_log: YB pg15 85f8a82 adds call of yb_message_from_status_data that gets renamed to yb_format_and_append by YB master 2d0bd35. - elog_start, elog_finish: upstream PG 17a28b03645e27d73bf69a95d7569b61e58f06eb deletes these functions, so drop changes to elog_finish from YB master 2d0bd35. - EVALUATE_MESSAGE: YB master 2d0bd35 deletes yb_debug_report_error_stacktrace if condition whereas upstream PG d6c55de1f99a9028540516316b95321a7b12a540 removes pfree(fmtbuf). Do both deletions. - Check that YB_EVALUATE_MESSAGE_FROM_STATUS is up-to-date with EVALUATE_MESSAGE: it is now closer due to PG d6c55de1f99a9028540516316b95321a7b12a540 removing formatting code that was never present in YB_EVALUATE_MESSAGE_FROM_STATUS. Update the comment for YB_EVALUATE_MESSAGE_FROM_STATUS accordingly. - Check that yb_additional_errmsg is called wherever needed as explained in the comment. The only new candidate function between PG 11.2 and PG 15.2 is errhint_plural, and that should not get yb_additional_errmsg since it's a hint. - pgbench.c: - includes: upstream PG dddf4cdc3300073ec04b2c3e482a4c1fa4b8191b moves pgbench.h include higher. Adjacent conflict with YB master 6a009b1 adding ysql_bench_metrics_handler.h include. - variable declarations: upstream PG 0e39a608ed5545cc6b9d538ac937c3c1ee8cdc36 adds pg_time_usec_t epoch_shift in the same location YB master 6a009b1 adds YsqlBenchMetricEntry *ysql_bench_metric_entry = NULL. Take both. - usage: YB pg15 initial merge moves --batch-size, introduced by YB master 62fd877, down into "Common options". This is an adjacent conflict with YB master 6a009b1 adding "Prometheus metrics options". - main: - old YB master 62fd877 adds batch-size, old YB master af25ba5 adds max-tries, incoming YB master 6a009b1 adds yb-metrics-bind-address and yb-metrics-bind-port. YB pg15 initial merge preserves max-tries but deletes batch-size (this might be because max-tries was an import that originated from upstream PG while batch-size was not). - It also handles the max-tries case properly in getopt_long while loop but omits the batch-size case. Ignore batch-size for now and try to faithfully preserve the yb-metrics-* options. This requires renumbering the cases. - Upstream PG 547f04e7348b6ed992bd4a197d39661fe7c25097 changes a line to "conn_total_duration = 0" where YB master 6a009b1 adjacently adds "Start metrics webserver". Apply both. - threadRun: YB master 6a009b1 adds "if (ysql_bench_metric_entry)", but upstream PG 9f75e3772350fb66f20a3d7f33bc94f30300d7eb moves all that code into a new function printProgressReport. Move the changes there. It is nontrivial to find where it belongs: after "fprintf(stderr," because that is the original line that was changed by old YB master af25ba5. It appears the changes of that old YB master af25ba5 have not yet been translated to the new location printProgressReport, but ignore this issue for now. - libpq-be.h: YB master 1b784fe adds yb_is_ssl_enabled_in_logical_conn while upstream PG adds authn_id in the same place. Apply both. - slot.h: (same as slot.c) - pg15_tests/test_D31368.sh, pg15_tests/passing_tests.tsv: thanks to YB master e62fdc8, TestPgRegressProc now passes, so move it to passing_tests.tsv.
yugabyte · Apr 9, 2024 · f87edee · f87edee
2 parents 71ebe84 + 9e07d79
commit f87edee
Show file tree

Hide file tree

Showing 666 changed files with 15,239 additions and 4,578 deletions.
diff --git a/.gitignore b/.gitignore
@@ -54,6 +54,7 @@ core.*
 
 # Temporary files
 .DS_Store
+.DS_Store.out
 .\#*.proto
 
 # C# Object files
@@ -115,6 +116,7 @@ submodules/
 !python/yugabyte/test_data/org_yb_pgsql_TestDropTableWithConcurrentTxn_testDmlTxnDrop_1pct_sample.log
 
 managed/yba-installer/bin/yba-ctl
+managed/.devspace
 managed/yba-installer/yba-ctl
 managed/devops/pex/pexEnv/
 managed/src/main/java/com/yugabyte/yw/common/operator/io/*

diff --git a/...gn/pessimistic-locking-functional-spec.md → ...esign/wait-on-conflict-functional-spec.md b/...gn/pessimistic-locking-functional-spec.md → ...esign/wait-on-conflict-functional-spec.md
@@ -1,13 +1,13 @@
-## YSQL Pessimistic Locking
+## Wait-on-Conflict Concurrency Control in YSQL
 
 
 ## 1 Introduction
 
-To understand pessimistic locking behaviour in PostgreSQL, it is important to take note of some points about row-level locking and its interaction with DMLs.
-
-### PostgreSQL supports 4 types of row-level locks -
+This document aims to define fail-on-conflict concurrency control behavior in YSQL, wait-on-conflict behavior in PostgreSQL, and outline the requirements for changing YSQL's default concurrency-control semantics.
 
+To understand wait-on-conflict concurrency control in PostgreSQL, it is important to take note of some points about row-level locking and its interaction with DMLs.
 
+### PostgreSQL supports 4 types of row-level locks -
 
 1. Exclusive modes (i.e., only 1 transaction can hold such a lock at any time) -
     1. `FOR UPDATE` - takes exclusive lock on the whole row (blocks the shared locks as well)
@@ -24,7 +24,7 @@ To understand pessimistic locking behaviour in PostgreSQL, it is important to ta
     3. `FOR SHARE`: doesn’t allow any concurrent modification to the locked tuple.
     4. `FOR KEY SHARE`: allow only modification to non-primary key cols
 
-Lock-modification conflict only happens when the period from lock acquire to lock release intersects/ overlaps with the read to commit time of the transaction performing the modification.
+Lock-modification conflict only happens when the period from lock acquire to lock release intersects/overlaps with the read to commit time of the transaction performing the modification.
 
 Lock-modification conflicts result in serialization errors. Note that the lock might be issued before or after the modification (see example 2i and 2ii).
 
@@ -44,22 +44,20 @@ In other words, a DML operation consists of _locking_ and _modification_ of a tu
 
 Given that writing = locking + modification, the definition of lock-modification conflict is more generic to think of for serialization errors (the usual write-write “conflict” we talk about is just a special case of this. A write-write conflict = implicit lock - modification conflict).
 
-### Conflicts in YSQL -
-
-YSQL does not differentiate between a lock-lock conflict and a lock-modification conflict. In either case, we will trigger conflict resolution. Conflict resolution in YSQL doesn’t lead to any blocking, but would surely abort either itself or all conflicting transactions based on priorities of the transactions (which are randomly chosen). This is _optimistic locking_.
+### Fail-on-Conflict in YSQL -
 
-Moreover, unlike PostgreSQL’s Serializable isolation, in YSQL’s Serializable isolation, plain SELECTs without explicit lock also implicitly take _FOR SHARE_ lock. And this can result in a plain read DML to conflict with a write DML (which will result in triggering conflict resolution).
+YSQL does not differentiate between a lock-lock conflict and a lock-modification conflict. In either case, such conflicts will be detected during conflict resolution. Conflict resolution in YSQL would not lead to any blocking, but would surely abort either itself or all conflicting transactions based on priorities of the transactions (which are randomly chosen). We call this behavior _fail-on-conflict concurrency control_.
 
-### Pessimistic Locking in PostgreSQL -
+### Wait-on-Conflict in PostgreSQL -
 
-PostgreSQL uses **pessimistic locking**, where a transaction will **wait upon encountering a lock-lock conflict** in order to acquire the locks it needs and proceed.
+PostgreSQL uses **wait-on-conflict concurrency control**, where a transaction will **wait upon encountering a lock-lock conflict** in order to acquire the locks it needs and proceed.
 
-**Pessimistic locking (In PostgreSQL):** When a transaction T1 attempts to take a lock (either implicitly via a write or explicitly using `SELECT FOR`), it might be blocked by transactions that hold a conflicting lock (note again, either implicitly via a write or explicitly using `SELECT FOR`). The transaction will then wait for all lock-lock conflicting transactions to end before obtaining the lock. Once the other transactions end, either the lock will be taken or a **lock-modification** conflict will be detected and a serialization error will occur.
+**Wait-on-conflict (In PostgreSQL):** When a transaction T1 attempts to take a lock (either implicitly via a write or explicitly using `SELECT FOR`), it might be blocked by transactions that hold a conflicting lock (note again, either implicitly via a write or explicitly using `SELECT FOR`). The transaction will then wait for all lock-lock conflicting transactions to end before obtaining the lock. Once the other transactions end, either the lock will be taken or a **lock-modification** conflict will be detected and a serialization error will occur.
 
 Some points to note are -
 
-1. **Transaction might skip being added to the queue:** If a transaction is trying to acquire a lock that doesn’t conflict with actively held locks but does conflict with a transaction that is already waiting to acquire a conflicting lock, it proceeds to take the lock. In short, the transaction skips being added to the queue itself (see example 3). This could also lead to starvation.
-2. **Transaction already in queue might jump older transactions in the queue: **Multiple transactions might be blocked to lock (either implicitly via write or explicitly) the same tuple and this results in a queue. When an active transaction ends, transactions that entered the queue earlier are checked first for unblocking. Transactions that don’t conflict with the active transactions are unblocked to acquire locks and become active. Note that this implies that a transaction that conflicts with earlier transactions blocked in the queue might still be unblocked if it doesn’t conflict with active transactions (which could lead to starvation).
+1. **Transaction might skip being added to the queue:** If a transaction is trying to acquire a lock that doesn’t conflict with actively held locks but does conflict with a transaction that is already waiting to acquire a conflicting lock, it proceeds to take the lock. In short, the transaction skips being added to the queue itself (see example 3). This improves availability at the potential cost of starvation.
+2. **Transaction already in queue might jump older transactions in the queue:** Multiple transactions might be blocked to lock the same tuple and this results in a queue. When an active transaction ends, we check other transactions in the wait queue which the resolved transaction was blocking. Transactions which no longer conflict with other active transactions are unblocked to acquire locks and become active. Note that this implies that a transaction that conflicts with earlier transactions blocked in the queue might still be unblocked if it doesn’t conflict with active transactions. This again improves availability at the potential cost of starvation, and matches the behavior in PostgreSQL.
 3. In case <code>[statement_timeout or lock_timeout](https://www.postgresql.org/docs/13/runtime-config-client.html)</code> is non-zero, a blocked transaction will abort after the timeout. (example 4). A zero timeout would imply indefinite waiting.
 
 NOTE: Refer this [article](https://postgrespro.com/blog/pgsql/5968005) for a good overview of row-level locking.
@@ -68,23 +66,23 @@ NOTE: Refer this [article](https://postgrespro.com/blog/pgsql/5968005) for a goo
 ## 2 Requirements
 
 1. Firstly, support lock-lock type of conflict that only blocks, in YSQL. This is to split the current notion of conflict in YSQL into the two finer notions that Postgres supports.
-2. In case a transaction T1 tries to acquire a lock as part of a implicit/explicit lock acquisition that conflicts with existing locks held by active transactions -
-    1. It has to wait for all transactions that have conflicting locks to end (waiting behaviour)
-    2. Throw a serialization error only if a modification that conflicts with the lock has occurred.
-    3. Ability to timeout and remove self from queue
-3. A [distributed deadlock detection algorithm](https://docs.google.com/document/d/1E4LHGmVZuTlr36_uczPuE6aAjoLT4sfr1o0YCtzUUPc/edit#) to break cycles. The following properties are required -
+2. In case a transaction T1 issues an RPC which tries to acquire a lock as part of a DML that conflicts with existing locks held by active transactions, the RPC should:
+    1. wait for all transactions that have conflicting locks to end (waiting behaviour)
+    2. throw a serialization error only if a modification that conflicts with the lock has committed
+    3. respect client-specified timeouts and remove itself from the queue
+3. Implement a [distributed deadlock detection algorithm](https://docs.google.com/document/d/1E4LHGmVZuTlr36_uczPuE6aAjoLT4sfr1o0YCtzUUPc/edit#) to break cycles. The following properties are required -
     1. No false positives
     2. Bound on latency = O (cycle size)
 4. Add a metric to measure the “intensity” of starvation by measuring the number instances where a transaction jumps the wait queue ahead of other waiting transactions that it conflicts with just because it doesn’t conflict with active transactions.
 
-Once pessimistic locking is supported, YSQL will have the same behaviour as Postgres with regards to transaction waiting behaviour in case of writing/locking, with **two exceptions**:
+Once wait-on-conflict concurrency control is supported, YSQL will have the same behaviour as Postgres with regards to transaction waiting behaviour in case of writing/locking, with **two exceptions**:
 
 1. For Serializable isolation level, there will be an extra YSQL-only behaviour of waiting in case a read (or write) intent is to be written that conflicts with an already existing write (or read) intent from another transaction. In PostgreSQL’s Serializable isolation implementation (i.e., SSI), reads don’t take implicit locks and hence don’t “lock” conflict with other writes. And hence, a read doesn’t wait on another write (and vice versa) in PostgreSQL’s Serializable isolation.
 2. YSQL writes fine grained column level intents in case of modifications to specific columns only. This will allow us to be more fine grained so that modifications to different columns of a row need not result in waiting (possibly followed by a serialization error). This is one aspect in which YSQL would turn out to be **better than** PostgreSQL - and semantically different in a hopefully beneficial way.
 
 ## 3 Usage
 
-**yb_use_pessimistic_locking**: a cluster level gflag to turn on pessimistic locking. This will require a cluster restart. Note that all transactions in the cluster will either use pessimistic locking or optimistic locking.
+**enable_wait_queues**: a cluster-level gflag to turn on wait-on-conflict behavior. This will require a cluster restart. Note that all transactions in the cluster will either use wait-on-conflict OR fail-on-conflict behavior, and mixed behavior is tolerable during restart/migration but otherwise generally not supported.
 
 ### Expected Behavior
 
@@ -613,15 +611,13 @@ START TRANSACTION</code>
   </tr>
 </table>
 
-2. **READ COMMITTED isolation level** (exactly as in PostgreSQL) has a dependency on pessimistic locking. Note that in general pessimistic locking is orthogonal to isolation levels but READ COMMITTED specifically has a “dependency” on pessimistic locking. To be precise, on facing a conflict, a transaction has to wait for the conflicting transaction to rollback/commit. On commit, the transaction will fetch the latest version of the row and work on that.
+2. **READ COMMITTED isolation level** (exactly as in PostgreSQL) has a dependency on wait-on-conflict concurrency control. To be precise, on facing a conflict, a transaction has to wait for the conflicting transaction to rollback/commit before retrying the statement. Once unblocked, the read committed session will operate on the newly-committed data when retrying the conflicting statement. If READ COMMITTED is used without wait-on-conflict, we will use an internal retry mechanism which differs slightly from PostgreSQL.
 
 ### Versioning and upgrades
 
-This feature is upgrade and downgrade safe. When turning the gflag on/off, or during rolling restarts across versions with the flag “on” in the higher version, if some nodes have pessimistic locking on and some don’t, users will experience mixed (but still correct) behavior. A mix of both optimistic locking and pessimistic locking will result in the following additional YSQL specific semantics -
+This feature is upgrade and downgrade safe. When turning the gflag on/off, or during rolling restarts across versions with the flag “on” in the higher version, if some nodes have wait-on-conflict behavior enabled and some don’t, users will experience mixed (but still correct) behavior. A mix of both fail-on-conflict and wait-on-conflict traffic will result in the following additional YSQL specific semantics -
 
-1. Assume T1 follows optimistic locking and T2 follows pessimistic locking
-2. If a transaction T1 uses optimistic locking and sees transactions that have written conflicting intents -
-    1. if there is even 1 conflicting transaction that follows pessimistic locking, abort T1 
-    2. Else, behaviour is as today
-3. If a transaction T1 uses pessimistic locking and sees transactions that have written conflicting intents -
-    1. Wait for all conflicting transactions to end
+1. If a transaction using fail-on-conflict sees transactions that have written conflicting intents -
+    1. Behaviour is as today
+2. If a transaction uses wait-on-conflict and sees transactions that have written conflicting intents -
+    1. Wait for all conflicting transactions to end (including any using fail-on-conflict semantics)
diff --git a/build-support/common-build-env-test.sh b/build-support/common-build-env-test.sh
@@ -182,10 +182,10 @@ test_set_cmake_build_type_and_compiler_type   release    darwin    clang      re
 test_set_cmake_build_type_and_compiler_type   release    linux-gnu clang      release    clang   0
 test_set_cmake_build_type_and_compiler_type   release    linux-gnu gcc        release    gcc     0
 test_set_cmake_build_type_and_compiler_type   release    linux-gnu gcc11      release    gcc11   0
-test_set_cmake_build_type_and_compiler_type   debug      linux-gnu auto       debug      clang16 0
-test_set_cmake_build_type_and_compiler_type   FaStDeBuG  linux-gnu auto       fastdebug  clang16 0
-test_set_cmake_build_type_and_compiler_type   release    linux-gnu auto       release    clang16 0
-test_set_cmake_build_type_and_compiler_type   tsan       linux-gnu auto       fastdebug  clang16 0
+test_set_cmake_build_type_and_compiler_type   debug      linux-gnu auto       debug      clang17 0
+test_set_cmake_build_type_and_compiler_type   FaStDeBuG  linux-gnu auto       fastdebug  clang17 0
+test_set_cmake_build_type_and_compiler_type   release    linux-gnu auto       release    clang17 0
+test_set_cmake_build_type_and_compiler_type   tsan       linux-gnu auto       fastdebug  clang17 0
 test_set_cmake_build_type_and_compiler_type   asan       linux-gnu auto       fastdebug  clang16 0
 
 # -------------------------------------------------------------------------------------------------

diff --git a/build-support/common-build-env.sh b/build-support/common-build-env.sh
@@ -550,8 +550,11 @@ set_default_compiler_type() {
       YB_COMPILER_TYPE=clang
       adjust_compiler_type_on_mac
     elif [[ $OSTYPE =~ ^linux ]]; then
-      detect_architecture
-      YB_COMPILER_TYPE=clang16
+      if [[ ${build_type} == "asan" ]]; then
+        YB_COMPILER_TYPE=clang16
+      else
+        YB_COMPILER_TYPE=clang17
+      fi
     else
       fatal "Cannot set default compiler type on OS $OSTYPE"
     fi

diff --git a/build-support/generate_test_certificates.sh b/build-support/generate_test_certificates.sh
@@ -195,47 +195,64 @@ EOT
                        -nocrypt
 }
 
-temp_dir="$(mktemp -d)"
-
-mkdir -p "$temp_dir/CA1" "$temp_dir/CA2" "$temp_dir/named" "$out_dir"
-
-generate_ca "$temp_dir/CA1" 'YugabyteDB CA 1'
-for i in $(seq 2 2 254); do
-  generate_node_cert "$temp_dir/CA1" "$i"
-done
-generate_ysql_cert "$temp_dir/CA1" ysql
-
-cp "$temp_dir/CA1/ca.crt" \
-   "$temp_dir/CA1/node."*".crt" \
-   "$temp_dir/CA1/node."*".key" \
-   "$temp_dir/CA1/ysql.crt" \
-   "$temp_dir/CA1/ysql.key" \
-   "$temp_dir/CA1/ysql.key.der" \
-   "$out_dir/"
-
-generate_ca "$temp_dir/CA2" 'YugabyteDB CA 2'
-for i in $(seq 2 2 254); do
-  generate_node_cert "$temp_dir/CA2" "$i"
-done
-
-cat "$temp_dir/CA2/ca.crt" "$temp_dir/CA1/ca.crt" > "$temp_dir/combinedCA.crt"
-
-mkdir -p "$out_dir/CA2"
-cp "$temp_dir/CA2/ca.crt" \
-   "$temp_dir/CA2/node."*".crt" \
-   "$temp_dir/CA2/node."*".key" \
-   "$temp_dir/combinedCA.crt" \
-   "$out_dir/CA2"
-
-generate_ca "$temp_dir/named" 'YugabyteDB CA'
-for i in 2 4 6 52 ; do
-  generate_node_named_cert "$temp_dir/named" $i
-done
-
-mkdir -p "$out_dir/named"
-cp "$temp_dir/named/ca.crt" \
-   "$temp_dir/named/node."*".crt" \
-   "$temp_dir/named/node."*".key" \
-   "$out_dir/named"
-
-rm -rf "$temp_dir"
+generate_test_certificates() {
+  local out_dir="$1"
+
+  set -euo pipefail
+
+  temp_dir="$(mktemp -d)"
+  mkdir -p "$temp_dir/CA1" "$temp_dir/CA2" "$temp_dir/named" "$out_dir"
+
+  generate_ca "$temp_dir/CA1" 'YugabyteDB CA 1'
+  for i in $(seq 2 2 254); do
+    generate_node_cert "$temp_dir/CA1" "$i"
+  done
+  generate_ysql_cert "$temp_dir/CA1" ysql
+
+  cp "$temp_dir/CA1/ca.crt" \
+     "$temp_dir/CA1/node."*".crt" \
+     "$temp_dir/CA1/node."*".key" \
+     "$temp_dir/CA1/ysql.crt" \
+     "$temp_dir/CA1/ysql.key" \
+     "$temp_dir/CA1/ysql.key.der" \
+     "$out_dir/"
+
+  generate_ca "$temp_dir/CA2" 'YugabyteDB CA 2'
+  for i in $(seq 2 2 254); do
+    generate_node_cert "$temp_dir/CA2" "$i"
+  done
+
+  cat "$temp_dir/CA2/ca.crt" "$temp_dir/CA1/ca.crt" > "$temp_dir/combinedCA.crt"
+
+  mkdir -p "$out_dir/CA2"
+  cp "$temp_dir/CA2/ca.crt" \
+     "$temp_dir/CA2/node."*".crt" \
+     "$temp_dir/CA2/node."*".key" \
+     "$temp_dir/combinedCA.crt" \
+     "$out_dir/CA2"
+
+  generate_ca "$temp_dir/named" 'YugabyteDB CA'
+  for i in 2 4 6 52 ; do
+    generate_node_named_cert "$temp_dir/named" $i
+  done
+
+  mkdir -p "$out_dir/named"
+  cp "$temp_dir/named/ca.crt" \
+     "$temp_dir/named/node."*".crt" \
+     "$temp_dir/named/node."*".key" \
+     "$out_dir/named"
+
+  rm -rf "$temp_dir"
+}
+
+report_error() {
+  local error=$?
+  local output="$1"
+  echo >&2 "Failed to generate test certificates. Command output:"
+  echo >&2
+  echo >&2 "$output"
+  exit $error
+}
+trap 'report_error "$out"' ERR
+out=$(generate_test_certificates "$out_dir" 2>&1)
+trap - ERR