Skip to content

Commit

Permalink
[#16212] YSQL: Fix conflict detection in serializable isolation in ca…
Browse files Browse the repository at this point in the history
…se of explicit row locking

Summary:
There are 2 situations when read operations create intents:
- read in context of transaction with `SERIALIZABLE` isolation level
- read with explicit row lock (such as `SELECT ... FOR KEY SHARE`)

In case of both conditions are met (`SELECT ... FOR KEY SHARE` within `SERIALIZABLE` txn) YB creates intents for explicit row lock only.
As a result YB may detects conflict incorrectly (because row lock intents can be weaker than serializable read intents)

Example:

```
-- Txn1:
START TRANSACTION ISOLATION LEVEL SERIALIZABLE;
UPDATE t SET v = 100 WHERE k = 1;

-- Txn2:
START TRANSACTION ISOLATION LEVEL SERIALIZABLE;
SELECT * FROM t WHERE k = 1; -- has conflict with txn1 OK

-- Txn3:
START TRANSACTION ISOLATION LEVEL SERIALIZABLE;
SELECT * FROM t WHERE k = 1 FOR KEY SHARE; -- has no conflicts with txn1 FAIL
```

Solution is to create both types of intents (serializable read + row lock) in case both conditions are met.

**Note:**
Actually 2 intents will be create only in row lock intent are stronger than serializable read intent.
Jira: DB-5641

Test Plan:
New unit test is introduced

```
./yb_build.sh --cxx-test pg_misc_conflicts-test --gtest_filter PgMiscConflictsTest/PgRowLockTest.SerializableTxnUpdate/*
```

Reviewers: sergei, pjain, rsami, tnayak, tvesely

Reviewed By: pjain

Subscribers: mbautin, yql, ybase, bogdan

Differential Revision: https://phorge.dev.yugabyte.com/D24861
  • Loading branch information
d-uspenskiy committed Sep 4, 2023
1 parent c2694db commit decb104
Show file tree
Hide file tree
Showing 15 changed files with 329 additions and 158 deletions.
15 changes: 9 additions & 6 deletions src/yb/docdb/conflict_resolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,7 @@ class TransactionConflictResolverContext : public ConflictResolverContextBase {
/* intent_value */ Slice(),
[&intent_processor, intent_types](
auto ancestor_doc_key, dockv::FullDocKey full_doc_key, auto, auto intent_key,
auto) {
auto, auto) {
intent_processor.Process(ancestor_doc_key, full_doc_key, intent_key, intent_types);
return Status::OK();
},
Expand All @@ -1010,13 +1010,16 @@ class TransactionConflictResolverContext : public ConflictResolverContextBase {
}

const auto& pairs = write_batch_.read_pairs();
intent_types = dockv::GetIntentTypesForRead(metadata_.isolation, row_mark);
if (!pairs.empty()) {
RETURN_NOT_OK(EnumerateIntents(
pairs,
[&intent_processor, intent_types] (
auto ancestor_doc_key, dockv::FullDocKey full_doc_key, auto, auto intent_key, auto) {
intent_processor.Process(ancestor_doc_key, full_doc_key, intent_key, intent_types);
[&intent_processor,
intent_types = dockv::GetIntentTypesForRead(metadata_.isolation, row_mark)] (
auto ancestor_doc_key, auto full_doc_key, auto, auto* intent_key, auto,
auto is_row_lock) {
intent_processor.Process(
ancestor_doc_key, full_doc_key, intent_key,
GetIntentTypes(intent_types, is_row_lock));
return Status::OK();
},
resolver->partial_range_key_intents()));
Expand Down Expand Up @@ -1213,7 +1216,7 @@ class OperationConflictResolverContext : public ConflictResolverContextBase {
/* intent_value */ Slice(),
[&intent_processor, intent_types](
auto ancestor_doc_key, dockv::FullDocKey full_doc_key, auto, auto intent_key,
auto) {
auto, auto) {
intent_processor.Process(ancestor_doc_key, full_doc_key, intent_key, intent_types);
return Status::OK();
},
Expand Down
9 changes: 4 additions & 5 deletions src/yb/docdb/docdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,14 @@ Result<DetermineKeysToLockResult> DetermineKeysToLock(
}

if (!read_pairs.empty()) {
const auto read_intent_types = dockv::GetIntentTypesForRead(isolation_level, row_mark_type);
RETURN_NOT_OK(EnumerateIntents(
read_pairs,
[&result, &read_intent_types](
dockv::AncestorDocKey ancestor_doc_key, dockv::FullDocKey, Slice, KeyBytes* key,
dockv::LastKey) {
[&result, intent_types = dockv:: GetIntentTypesForRead(isolation_level, row_mark_type)](
auto ancestor_doc_key, auto, auto, auto* key, auto, auto is_row_lock) {
auto actual_intents = GetIntentTypes(intent_types, is_row_lock);
return ApplyIntent(
RefCntPrefix(key->AsSlice()),
ancestor_doc_key ? MakeWeak(read_intent_types) : read_intent_types,
ancestor_doc_key ? MakeWeak(actual_intents) : actual_intents,
&result.lock_batch);
}, partial_range_key_intents));
}
Expand Down
91 changes: 70 additions & 21 deletions src/yb/docdb/pgsql_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,20 +178,68 @@ class DocKeyColumnPathBuilderHolder {

YB_STRONGLY_TYPED_BOOL(KeyOnlyRequested);

YB_DEFINE_ENUM(IntentFeature, (kRegularRead)(kKeyOnlyRead)(kRowLock));

// Helper class to describe what kind of intents must be created on particular doc key:
// - regular_read - intent for regular read is required
// - key_only_read - intent for key column only read is required
// - row_lock - intent for row lock is required.
// Note: regular_read and key_only_read can't be both equaled true.
class IntentMode {
public:
IntentMode(IsolationLevel level, RowMarkType row_mark, KeyOnlyRequested key_only_requested) {
auto intent_types = dockv::GetIntentTypesForRead(level, row_mark);
const auto& read_intents = intent_types.read;
auto& lock_intents = intent_types.row_mark;
if (!read_intents.None()) {
auto all_read_intents = MakeWeak(read_intents);
if (key_only_requested) {
features_.Set(IntentFeature::kKeyOnlyRead);
} else {
all_read_intents |= read_intents;
features_.Set(IntentFeature::kRegularRead);
}
lock_intents &= ~(all_read_intents);
}
if (!lock_intents.None()) {
features_.Set(IntentFeature::kRowLock);
}
}

[[nodiscard]] bool regular_read() const {
return features_.Test(IntentFeature::kRegularRead);
}

[[nodiscard]] bool key_only_read() const {
return features_.Test(IntentFeature::kKeyOnlyRead);
}

[[nodiscard]] bool row_lock() const {
return features_.Test(IntentFeature::kRowLock);
}

private:
EnumBitSet<IntentFeature> features_;
};

class IntentInserter {
public:
explicit IntentInserter(LWKeyValueWriteBatchPB* out)
: out_(*out) {}

void Add(Slice encoded_key, KeyOnlyRequested key_only_requested) {
if (!key_only_requested) {
void Add(Slice encoded_key, const IntentMode& mode) {
if (mode.regular_read()) {
Add(encoded_key);
return;
} else if (mode.key_only_read()) {
auto& buf = buffer();
buf.Clear();
DocKeyColumnPathBuilder doc_key_builder(&buf, encoded_key);
Add(doc_key_builder.Build(dockv::SystemColumnIds::kLivenessColumn));
}

if (mode.row_lock()) {
Add(encoded_key, /*row_lock=*/ true);
}
auto& buf = buffer();
buf.Clear();
DocKeyColumnPathBuilder doc_key_builder(&buf, encoded_key);
Add(doc_key_builder.Build(dockv::SystemColumnIds::kLivenessColumn));
}

private:
Expand All @@ -202,10 +250,13 @@ class IntentInserter {
return *buffer_;
}

void Add(Slice encoded_key) {
void Add(Slice encoded_key, bool is_row_lock = false) {
auto& pair = *out_.add_read_pairs();
pair.dup_key(encoded_key);
pair.dup_value(Slice(&dockv::ValueEntryTypeAsChar::kNullLow, 1));
pair.dup_value(Slice(
is_row_lock ? &dockv::ValueEntryTypeAsChar::kRowLock
: &dockv::ValueEntryTypeAsChar::kNullLow,
1));
}

std::optional<dockv::KeyBytes> buffer_;
Expand Down Expand Up @@ -1990,10 +2041,10 @@ Status PgsqlReadOperation::PopulateAggregate(WriteBuffer *result_buffer) {
return Status::OK();
}

Status PgsqlReadOperation::GetIntents(const Schema& schema, LWKeyValueWriteBatchPB* out) {
const auto has_row_mark = IsValidRowMarkType(
request_.has_row_mark_type() ? request_.row_mark_type() : ROW_MARK_ABSENT);
if (has_row_mark) {
Status PgsqlReadOperation::GetIntents(
const Schema& schema, IsolationLevel level, LWKeyValueWriteBatchPB* out) {
const auto row_mark = request_.has_row_mark_type() ? request_.row_mark_type() : ROW_MARK_ABSENT;
if (IsValidRowMarkType(row_mark)) {
RSTATUS_DCHECK(request_.has_wait_policy(), IllegalState, "wait policy is expected");
out->set_wait_policy(request_.wait_policy());
}
Expand All @@ -2003,21 +2054,19 @@ Status PgsqlReadOperation::GetIntents(const Schema& schema, LWKeyValueWriteBatch

DocKeyAccessor accessor(schema);
if (!(has_batch_arguments || request_.has_ybctid_column_value())) {
inserter.Add(VERIFY_RESULT(accessor.GetEncoded(request_)), KeyOnlyRequested::kFalse);
inserter.Add(
VERIFY_RESULT(accessor.GetEncoded(request_)), {level, row_mark, KeyOnlyRequested::kFalse});
return Status::OK();
}

// TODO(dmitry): the '!has_row_mark' requirement will be removed in context of fix for #16212.
const KeyOnlyRequested key_only_requested{
!has_row_mark && IsOnlyKeyColumnsRequested(schema, request_)};
const IntentMode mode{
level, row_mark, KeyOnlyRequested(IsOnlyKeyColumnsRequested(schema, request_))};
for (const auto& batch_argument : request_.batch_arguments()) {
inserter.Add(
VERIFY_RESULT(accessor.GetEncoded(batch_argument.ybctid())), key_only_requested);
inserter.Add(VERIFY_RESULT(accessor.GetEncoded(batch_argument.ybctid())), mode);
}
if (!has_batch_arguments) {
DCHECK(request_.has_ybctid_column_value());
inserter.Add(
VERIFY_RESULT(accessor.GetEncoded(request_.ybctid_column_value())), key_only_requested);
inserter.Add(VERIFY_RESULT(accessor.GetEncoded(request_.ybctid_column_value())), mode);
}
return Status::OK();
}
Expand Down
2 changes: 1 addition & 1 deletion src/yb/docdb/pgsql_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class PgsqlReadOperation : public DocExprExecutor {

Status GetSpecialColumn(ColumnIdRep column_id, QLValuePB* result);

Status GetIntents(const Schema& schema, LWKeyValueWriteBatchPB* out);
Status GetIntents(const Schema& schema, IsolationLevel level, LWKeyValueWriteBatchPB* out);

private:
// Execute a READ operator for a given scalar argument.
Expand Down
35 changes: 20 additions & 15 deletions src/yb/docdb/rocksdb_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -241,35 +241,44 @@ Status TransactionalWriter::Apply(rocksdb::DirectWriteHandler* handler) {
LOG(WARNING) << "Performing a write with row lock " << RowMarkType_Name(row_mark_)
<< " when only reads are expected";
}
intent_types_ = dockv::GetIntentTypesForWrite(isolation_level_);

// We cannot recover from failures here, because it means that we cannot apply replicated
// operation.
RETURN_NOT_OK(EnumerateIntents(
put_batch_.write_pairs(), std::ref(*this), partial_range_key_intents_));
put_batch_.write_pairs(),
[this, intent_types = dockv::GetIntentTypesForWrite(isolation_level_)](
auto ancestor_doc_key, auto full_doc_key, auto value, auto* key, auto last_key, auto) {
return (*this)(intent_types, ancestor_doc_key, full_doc_key, value, key, last_key);
},
partial_range_key_intents_));
}

if (!put_batch_.read_pairs().empty()) {
intent_types_ = dockv::GetIntentTypesForRead(isolation_level_, row_mark_);
RETURN_NOT_OK(EnumerateIntents(
put_batch_.read_pairs(), std::ref(*this), partial_range_key_intents_));
put_batch_.read_pairs(),
[this, intent_types = dockv::GetIntentTypesForRead(isolation_level_, row_mark_)](
auto ancestor_doc_key, auto full_doc_key,
const auto& value, auto* key, auto last_key, auto is_row_lock) {
return (*this)(
dockv::GetIntentTypes(intent_types, is_row_lock), ancestor_doc_key,
full_doc_key, value, key, last_key);
},
partial_range_key_intents_));
}

return Finish();
}

// Using operator() to pass this object conveniently to EnumerateIntents.
Status TransactionalWriter::operator()(
dockv::AncestorDocKey ancestor_doc_key, dockv::FullDocKey, Slice value_slice,
dockv::KeyBytes* key, dockv::LastKey last_key) {
dockv::IntentTypeSet intent_types, dockv::AncestorDocKey ancestor_doc_key, dockv::FullDocKey,
Slice intent_value, dockv::KeyBytes* key, dockv::LastKey last_key) {
if (ancestor_doc_key) {
weak_intents_[key->data()] |= MakeWeak(intent_types_);
weak_intents_[key->data()] |= MakeWeak(intent_types);
return Status::OK();
}

const auto transaction_value_type = ValueEntryTypeAsChar::kTransactionId;
const auto write_id_value_type = ValueEntryTypeAsChar::kWriteId;
const auto row_lock_value_type = ValueEntryTypeAsChar::kRowLock;
IntraTxnWriteId big_endian_write_id = BigEndian::FromHost32(intra_txn_write_id_);

const auto subtransaction_value_type = KeyEntryTypeAsChar::kSubTransactionId;
Expand All @@ -291,17 +300,13 @@ Status TransactionalWriter::operator()(
subtransaction_id,
Slice(&write_id_value_type, 1),
Slice::FromPod(&big_endian_write_id),
value_slice,
intent_value,
}};
// Store a row lock indicator rather than data (in value_slice) for row lock intents.
if (IsValidRowMarkType(row_mark_)) {
value.back() = Slice(&row_lock_value_type, 1);
}

++intra_txn_write_id_;

char intent_type[2] = { KeyEntryTypeAsChar::kIntentTypeSet,
static_cast<char>(intent_types_.ToUIntPtr()) };
static_cast<char>(intent_types.ToUIntPtr()) };

DocHybridTimeBuffer doc_ht_buffer;

Expand Down
8 changes: 4 additions & 4 deletions src/yb/docdb/rocksdb_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,12 @@ class TransactionalWriter : public rocksdb::DirectWriter {
metadata_to_store_ = value;
}

private:
Status operator()(
dockv::AncestorDocKey ancestor_doc_key, dockv::FullDocKey full_doc_key, Slice value_slice,
dockv::KeyBytes* key, dockv::LastKey last_key);
dockv::IntentTypeSet intent_types, dockv::AncestorDocKey ancestor_doc_key,
dockv::FullDocKey full_doc_key, Slice value_slice, dockv::KeyBytes* key,
dockv::LastKey last_key);

private:
Status Finish();
Status AddWeakIntent(
const std::pair<KeyBuffer, dockv::IntentTypeSet>& intent_and_types,
Expand All @@ -107,7 +108,6 @@ class TransactionalWriter : public rocksdb::DirectWriter {
rocksdb::DirectWriteHandler* handler_;
RowMarkType row_mark_;
SubTransactionId subtransaction_id_;
dockv::IntentTypeSet intent_types_;
std::unordered_map<KeyBuffer, dockv::IntentTypeSet, ByteBufferHash> weak_intents_;
};

Expand Down
3 changes: 2 additions & 1 deletion src/yb/dockv/doc_key-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,8 @@ class IntentCollector {
FullDocKey full_doc_key,
Slice value,
KeyBytes* key,
LastKey) {
LastKey,
IsRowLock) {
out_->push_back(CollectedIntent{
.ancestor_doc_key = ancestor_doc_key,
.full_doc_key = full_doc_key,
Expand Down
Loading

0 comments on commit decb104

Please sign in to comment.