From 7020d08953e8e140d424097eb86ec6bf26286e9d Mon Sep 17 00:00:00 2001 From: Robert Pang Date: Sat, 14 Apr 2018 23:48:07 -0700 Subject: [PATCH] ENG-3189: Fix intermittent "transaction expired" error when committing a distributed transaction Summary: In transaction coordinator, the last_touch_ time of a transaction may be skewed compared to the coordinator's clock. Sometimes the last_touch_ time may go after the coordinator's clock and thus an underflow error when last_touch_ time is subtracted from now() in TransactionState::ExpiredAt() and the transaction erroneously considered expired. Test Plan: Run CassandraTransactionalKeyValue read/write workload with 1000000 keys inserted in a GCP cluster with no "transaction expired" error. ``` java -jar ~/code/yugabyte/java/yb-loadtester/target/yb-sample-apps.jar -workload CassandraTransactionalKeyValue -num_threads_read 64 -num_threads_write 8 -nodes ... ``` Reviewers: mikhail, sergei Reviewed By: sergei Differential Revision: https://phabricator.dev.yugabyte.com/D4615 --- src/yb/client/ql-transaction-test.cc | 2 +- src/yb/tablet/transaction_coordinator.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/yb/client/ql-transaction-test.cc b/src/yb/client/ql-transaction-test.cc index 247f70f97f11..5b6c79846d3c 100644 --- a/src/yb/client/ql-transaction-test.cc +++ b/src/yb/client/ql-transaction-test.cc @@ -42,7 +42,7 @@ using namespace std::literals; // NOLINT -DECLARE_uint64(transaction_timeout_usec); +DECLARE_int64(transaction_timeout_usec); DECLARE_uint64(transaction_heartbeat_usec); DECLARE_uint64(transaction_table_num_tablets); DECLARE_uint64(log_segment_size_bytes); diff --git a/src/yb/tablet/transaction_coordinator.cc b/src/yb/tablet/transaction_coordinator.cc index 1376ac09acdd..fecad194b834 100644 --- a/src/yb/tablet/transaction_coordinator.cc +++ b/src/yb/tablet/transaction_coordinator.cc @@ -55,7 +55,7 @@ #include "yb/util/tsan_util.h" DECLARE_uint64(transaction_heartbeat_usec); -DEFINE_uint64(transaction_timeout_usec, 1500000, "Transaction expiration timeout in usec."); +DEFINE_int64(transaction_timeout_usec, 1500000, "Transaction expiration timeout in usec."); DEFINE_uint64(transaction_check_interval_usec, 500000, "Transaction check interval in usec."); DEFINE_double(transaction_ignore_applying_probability_in_tests, 0, "Probability to ignore APPLYING update in tests."); @@ -149,7 +149,7 @@ class TransactionState { if (ShouldBeCommitted()) { return false; } - auto passed = now.GetPhysicalValueMicros() - last_touch_.GetPhysicalValueMicros(); + const int64_t passed = now.GetPhysicalValueMicros() - last_touch_.GetPhysicalValueMicros(); return passed > FLAGS_transaction_timeout_usec; }