Skip to content

Commit

Permalink
ENG-2960: Set upperbound for intent iterator in IntentAwareIterator.
Browse files Browse the repository at this point in the history
Summary:
This revision fixes the read ops drop in table that has distributed transactions enabled. The drop
happened because the intent iterator in IntentAwareIterator had to scan over deleted intents which
accumulated over time. By setting an upperbound for intent iterator in IntentAwareIterator, it
avoid scanning over those deleted intents unnecessarily.

Test Plan:
```
java -jar ~/code/yugabyte/java/yb-loadtester/target/yb-sample-apps.jar -workload CassandraTransactionalKeyValue -num_threads_read 8 -num_threads_write 2 -nodes 127.0.0.1:9042
...
2018-03-08 23:57:16,186 [INFO|com.yugabyte.sample.common.metrics.MetricsTracker|MetricsTracker] Read: 1199.65 ops/sec (6.67 ms/op), 45715 total ops  |  Write: 166.04 ops/sec (12.04 ms/op), 6350 total ops  |  Uptime: 40025 ms | maxWrittenKey: 6348 | maxGeneratedKey: 6351 |
2018-03-08 23:57:21,189 [INFO|com.yugabyte.sample.common.metrics.MetricsTracker|MetricsTracker] Read: 1219.43 ops/sec (6.56 ms/op), 51816 total ops  |  Write: 163.10 ops/sec (12.27 ms/op), 7166 total ops  |  Uptime: 45028 ms | maxWrittenKey: 7165 | maxGeneratedKey: 7167 |
2018-03-08 23:57:26,190 [INFO|com.yugabyte.sample.common.metrics.MetricsTracker|MetricsTracker] Read: 1268.32 ops/sec (6.31 ms/op), 58159 total ops  |  Write: 160.56 ops/sec (12.43 ms/op), 7969 total ops  |  Uptime: 50029 ms | maxWrittenKey: 7967 | maxGeneratedKey: 7970 |
2018-03-08 23:57:31,195 [INFO|com.yugabyte.sample.common.metrics.MetricsTracker|MetricsTracker] Read: 1256.21 ops/sec (6.37 ms/op), 64446 total ops  |  Write: 166.04 ops/sec (12.06 ms/op), 8800 total ops  |  Uptime: 55034 ms | maxWrittenKey: 8799 | maxGeneratedKey: 8801 |
2018-03-08 23:57:36,196 [INFO|com.yugabyte.sample.common.metrics.MetricsTracker|MetricsTracker] Read: 1187.12 ops/sec (6.74 ms/op), 70383 total ops  |  Write: 164.36 ops/sec (12.18 ms/op), 9622 total ops  |  Uptime: 60035 ms | maxWrittenKey: 9621 | maxGeneratedKey: 9623 |
```

Reviewers: sergei

Reviewed By: sergei

Subscribers: ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D4256
  • Loading branch information
robertpang committed Mar 13, 2018
1 parent 7147469 commit e0b78dc
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 5 deletions.
49 changes: 44 additions & 5 deletions src/yb/docdb/intent_aware_iterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@ IntentAwareIterator::IntentAwareIterator(
intent_iter_ = docdb::CreateRocksDBIterator(rocksdb,
docdb::BloomFilterMode::DONT_USE_BLOOM_FILTER,
boost::none,
rocksdb::kDefaultQueryId);
rocksdb::kDefaultQueryId,
nullptr /* file_filter */,
&intent_upperbound_);
}
iter_.reset(rocksdb->NewIterator(read_opts));
}
Expand All @@ -239,6 +241,10 @@ void IntentAwareIterator::Seek(const Slice& key) {
ROCKSDB_SEEK(iter_.get(), key);
SkipFutureRecords();
if (intent_iter_) {
status_ = SetIntentUpperbound();
if (!status_.ok()) {
return;
}
ROCKSDB_SEEK(intent_iter_.get(), GetIntentPrefixForKeyWithoutHt(key));
SeekForwardToSuitableIntent();
}
Expand All @@ -256,6 +262,10 @@ void IntentAwareIterator::SeekForward(const Slice& key) {
auto key_bytes = AppendDocHt(key, DocHybridTime(read_time_.global_limit, kMaxWriteId));
SeekForwardRegular(key_bytes);
if (intent_iter_ && status_.ok()) {
status_ = SetIntentUpperbound();
if (!status_.ok()) {
return;
}
GetIntentPrefixForKeyWithoutHt(key, &key_bytes);
SeekForwardToSuitableIntent(key_bytes);
}
Expand All @@ -270,6 +280,10 @@ void IntentAwareIterator::SeekPastSubKey(const Slice& key) {
docdb::SeekPastSubKey(key, iter_.get());
SkipFutureRecords();
if (intent_iter_ && status_.ok()) {
status_ = SetIntentUpperbound();
if (!status_.ok()) {
return;
}
KeyBytes intent_prefix = GetIntentPrefixForKeyWithoutHt(key);
// Skip all intents for subdoc_key.
intent_prefix.mutable_data()->push_back(static_cast<char>(ValueType::kIntentType) + 1);
Expand All @@ -285,6 +299,10 @@ void IntentAwareIterator::SeekOutOfSubDoc(const Slice& key) {

docdb::SeekOutOfSubKey(key, iter_.get());
if (intent_iter_ && status_.ok()) {
status_ = SetIntentUpperbound();
if (!status_.ok()) {
return;
}
KeyBytes intent_prefix = GetIntentPrefixForKeyWithoutHt(key);
// See comment for SubDocKey::AdvanceOutOfSubDoc.
intent_prefix.AppendValueType(ValueType::kMaxByte);
Expand Down Expand Up @@ -435,7 +453,9 @@ void IntentAwareIterator::SeekForwardToSuitableIntent(const KeyBytes &intent_key
resolved_intent_key_prefix_.CompareTo(intent_key_prefix) >= 0) {
return;
}
docdb::SeekForward(intent_key_prefix, intent_iter_.get());
// Use ROCKSDB_SEEK() to force re-seek of "intent_iter_" in case the iterator was invalid by the
// previous intent upperbound, but the upperbound has changed therefore requiring re-seek.
ROCKSDB_SEEK(intent_iter_.get(), intent_key_prefix.AsSlice());
SeekForwardToSuitableIntent();
}

Expand All @@ -448,9 +468,6 @@ void IntentAwareIterator::SeekForwardToSuitableIntent() {
// Find latest suitable intent for the first SubDocKey having suitable intents.
while (intent_iter_->Valid()) {
auto intent_key = intent_iter_->key();
if (GetKeyType(intent_key) != KeyType::kIntentKey) {
break;
}
VLOG(4) << "Intent found: " << DebugIntentKeyToString(intent_key)
<< ", resolved state: " << yb::ToString(resolved_intent_state_);
if (resolved_intent_state_ != ResolvedIntentState::kNoIntent &&
Expand Down Expand Up @@ -651,5 +668,27 @@ void IntentAwareIterator::SkipFutureIntents() {
SeekForwardToSuitableIntent();
}

Status IntentAwareIterator::SetIntentUpperbound() {
intent_upperbound_keybytes_.Clear();
intent_upperbound_keybytes_.AppendValueType(ValueType::kIntentPrefix);
if (iter_->Valid()) {
// Strip ValueType::kHybridTime + DocHybridTime at the end of SubDocKey in iter_ and append
// to upperbound with 0xff.
Slice subdoc_key = iter_->key();
int doc_ht_size = 0;
RETURN_NOT_OK(DocHybridTime::CheckAndGetEncodedSize(subdoc_key, &doc_ht_size));
subdoc_key.remove_suffix(1 + doc_ht_size);
intent_upperbound_keybytes_.AppendRawBytes(subdoc_key);
intent_upperbound_keybytes_.AppendValueType(ValueType::kMaxByte);
} else {
// In case the current position of the regular iterator is invalid, set the exclusive
// upperbound to the beginning of the transaction metadata region.
intent_upperbound_keybytes_.AppendValueType(ValueType::kTransactionId);
}
intent_upperbound_ = intent_upperbound_keybytes_.AsSlice();
VLOG(4) << "SetIntentUpperbound = " << intent_upperbound_.ToDebugString();
return Status::OK();
}

} // namespace docdb
} // namespace yb
9 changes: 9 additions & 0 deletions src/yb/docdb/intent_aware_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,11 @@ class IntentAwareIterator {
// Whether current entry is regular key-value pair.
bool IsEntryRegular();

// Set the exclusive upperbound of the intent iterator to the current SubDocKey of the regular
// iterator. This is necessary to avoid RocksDB iterator from scanning over the deleted intents
// beyond the current regular key unnecessarily.
CHECKED_STATUS SetIntentUpperbound();

const ReadHybridTime read_time_;
const TransactionOperationContextOpt txn_op_context_;
std::unique_ptr<rocksdb::Iterator> intent_iter_;
Expand All @@ -194,6 +199,10 @@ class IntentAwareIterator {
Status status_;
HybridTime max_seen_ht_ = HybridTime::kMin;

// Exclusive upperbound of the intent key.
KeyBytes intent_upperbound_keybytes_;
Slice intent_upperbound_;

// Following fields contain information related to resolved suitable intent.
ResolvedIntentState resolved_intent_state_ = ResolvedIntentState::kNoIntent;
// kIntentPrefix + SubDocKey (no HT).
Expand Down

0 comments on commit e0b78dc

Please sign in to comment.