Skip to content

Commit

Permalink
ENG-4240: #613: Fix checking of tablet presence during transaction CL…
Browse files Browse the repository at this point in the history
…EANUP

Summary:
We were failing to check the return code of the function `LookupTablePeerOrRespond` when CLEANUP request is received by tablet service.
This was causing the following FATAL right after restart during software upgrade on a cluster with SecondaryIndex workload.

```#0  yb::tserver::TabletServiceImpl::CheckMemoryPressure<yb::tserver::UpdateTransactionResponsePB> (this=this@entry=0x24c2e00, tablet=tablet@entry=0x0,
    resp=resp@entry=0x14d3d410, context=context@entry=0x7f55b1eb5600) at ../../src/yb/tserver/tablet_service.cc:222
#1  0x00007f55d4c8a881 in yb::tserver::TabletServiceImpl::UpdateTransaction (this=this@entry=0x24c2e00, req=req@entry=0x1057aa90, resp=resp@entry=0x14d3d410, context=...)
    at ../../src/yb/tserver/tablet_service.cc:431
#2  0x00007f55d273f28a in yb::tserver::TabletServerServiceIf::Handle (this=0x24c2e00, call=...) at src/yb/tserver/tserver_service.service.cc:267
#3  0x00007f55cff0a3ea in yb::rpc::ServicePoolImpl::Handle (this=0x27ca540, incoming=...) at ../../src/yb/rpc/service_pool.cc:214```

Changed LookupTablePeerOrRespond to return complete result using return value.

Test Plan: Update xdc-user-identity and check that is does not crash and workload is stable.

Reviewers: robert, hector, mikhail, kannan

Reviewed By: mikhail, kannan

Subscribers: kannan, ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D5772
  • Loading branch information
spolitov committed Nov 30, 2018
1 parent 1b01b96 commit 566d6d2
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 75 deletions.
20 changes: 11 additions & 9 deletions src/yb/client/ql-transaction-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1130,10 +1130,12 @@ TEST_F(QLTransactionTest, CorrectStatusRequestBatching) {
while (!stop) {
auto txn = CreateTransaction();
session->SetTransaction(txn);
WriteRow(session, key, value + 1);
auto status = txn->CommitFuture().get();
if (status.ok()) {
++value;
auto write_result = WriteRow(session, key, value + 1);
if (write_result.ok()) {
auto status = txn->CommitFuture().get();
if (status.ok()) {
++value;
}
}
}
});
Expand Down Expand Up @@ -1362,7 +1364,7 @@ TEST_F(QLTransactionTest, WaitRead) {
auto session = CreateSession();
int32_t value = 0;
while (!stop) {
WriteRow(session, i, ++value);
ASSERT_OK(WriteRow(session, i, ++value));
}
});
}
Expand Down Expand Up @@ -1435,7 +1437,7 @@ TEST_F(QLTransactionTest, InsertDeleteWithClusterRestart) {
constexpr int kKeys = 100;

for (int i = 0; i != kKeys; ++i) {
WriteRow(CreateSession(), i /* key */, i * 2 /* value */, WriteOpType::INSERT);
ASSERT_OK(WriteRow(CreateSession(), i /* key */, i * 2 /* value */, WriteOpType::INSERT));
}

auto txn = CreateTransaction();
Expand Down Expand Up @@ -1600,17 +1602,17 @@ TEST_F(QLTransactionTest, DelayedInit) {
auto txn2 = std::make_shared<YBTransaction>(transaction_manager_.get_ptr());

auto write_session = CreateSession();
WriteRow(write_session, 0, 0);
ASSERT_OK(WriteRow(write_session, 0, 0));

ConsistentReadPoint read_point(transaction_manager_->clock());
read_point.SetCurrentReadTime();

WriteRow(write_session, 1, 1);
ASSERT_OK(WriteRow(write_session, 1, 1));

ASSERT_OK(txn1->Init(IsolationLevel::SNAPSHOT_ISOLATION, read_point.GetReadTime()));
ASSERT_OK(txn2->Init(IsolationLevel::SNAPSHOT_ISOLATION));

WriteRow(write_session, 2, 2);
ASSERT_OK(WriteRow(write_session, 2, 2));

{
auto read_session = CreateSession(txn1);
Expand Down
33 changes: 18 additions & 15 deletions src/yb/tserver/service_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,33 +103,34 @@ StdStatusCallback BindHandleResponse(RespType* resp,
//
// Returns true if successful.
template<class RespClass>
bool LookupTabletPeerOrRespond(TabletPeerLookupIf* tablet_manager,
const string& tablet_id,
RespClass* resp,
rpc::RpcContext* context,
std::shared_ptr<tablet::TabletPeer>* peer) {
Status status = tablet_manager->GetTabletPeer(tablet_id, peer);
Result<std::shared_ptr<tablet::TabletPeer>> LookupTabletPeerOrRespond(
TabletPeerLookupIf* tablet_manager,
const string& tablet_id,
RespClass* resp,
rpc::RpcContext* context) {
std::shared_ptr<tablet::TabletPeer> result;
Status status = tablet_manager->GetTabletPeer(tablet_id, &result);
if (PREDICT_FALSE(!status.ok())) {
TabletServerErrorPB::Code code = status.IsServiceUnavailable() ?
TabletServerErrorPB::UNKNOWN_ERROR :
TabletServerErrorPB::TABLET_NOT_FOUND;
SetupErrorAndRespond(resp->mutable_error(), status, code, context);
return false;
return status;
}

// Check RUNNING state.
tablet::TabletStatePB state = (*peer)->state();
tablet::TabletStatePB state = result->state();
if (PREDICT_FALSE(state != tablet::RUNNING)) {
Status s = STATUS(IllegalState, "Tablet not RUNNING",
tablet::TabletStatePB_Name(state));
Status s = STATUS(IllegalState, "Tablet not RUNNING", tablet::TabletStatePB_Name(state));
if (state == tablet::FAILED) {
s = s.CloneAndAppend((*peer)->error().ToString());
s = s.CloneAndAppend(result->error().ToString());
}
SetupErrorAndRespond(resp->mutable_error(), s,
TabletServerErrorPB::TABLET_NOT_RUNNING, context);
return false;
return s;
}
return true;

return result;
}

// A transaction completion callback that responds to the client when transactions
Expand Down Expand Up @@ -196,10 +197,12 @@ LeaderTabletPeer LookupLeaderTabletOrRespond(
const std::string& tablet_id,
RespClass* resp,
rpc::RpcContext* context) {
LeaderTabletPeer result;
if (!LookupTabletPeerOrRespond(tablet_manager, tablet_id, resp, context, &result.peer)) {
auto peer = LookupTabletPeerOrRespond(tablet_manager, tablet_id, resp, context);
if (!peer.ok()) {
return LeaderTabletPeer();
}
LeaderTabletPeer result;
result.peer = *peer;

if (!result.FillTerm(resp->mutable_error(), context)) {
return LeaderTabletPeer();
Expand Down
80 changes: 34 additions & 46 deletions src/yb/tserver/tablet_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,13 @@ void TabletServiceAdminImpl::AlterSchema(const AlterSchemaRequestPB* req,
std::move(operation_state)), tablet.leader_term);
}

#define VERIFY_RESULT_OR_RETURN(expr) \
__extension__ ({ \
auto&& __result = (expr); \
if (!__result.ok()) { return; } \
std::move(*__result); \
})

void TabletServiceImpl::UpdateTransaction(const UpdateTransactionRequestPB* req,
UpdateTransactionResponsePB* resp,
rpc::RpcContext context) {
Expand All @@ -424,8 +431,8 @@ void TabletServiceImpl::UpdateTransaction(const UpdateTransactionRequestPB* req,
tablet = LookupLeaderTabletOrRespond(
server_->tablet_peer_lookup(), req->tablet_id(), resp, &context);
} else {
LookupTabletPeerOrRespond(server_->tablet_peer_lookup(), req->tablet_id(), resp, &context,
&tablet.peer);
tablet.peer = VERIFY_RESULT_OR_RETURN(LookupTabletPeerOrRespond(
server_->tablet_peer_lookup(), req->tablet_id(), resp, &context));
tablet.leader_term = OpId::kUnknownTerm;
}
if (!tablet || !CheckMemoryPressure(tablet.peer->tablet(), resp, &context)) {
Expand All @@ -452,14 +459,8 @@ void TabletServiceImpl::GetTransactionStatus(const GetTransactionStatusRequestPB

UpdateClock(*req, server_->Clock());

tablet::TabletPeerPtr tablet_peer;
if (!LookupTabletPeerOrRespond(server_->tablet_peer_lookup(),
req->tablet_id(),
resp,
&context,
&tablet_peer)) {
return;
}
auto tablet_peer = VERIFY_RESULT_OR_RETURN(LookupTabletPeerOrRespond(
server_->tablet_peer_lookup(), req->tablet_id(), resp, &context));

auto status = CheckPeerIsLeaderAndReady(*tablet_peer);
if (!status.ok()) {
Expand Down Expand Up @@ -660,11 +661,8 @@ void TabletServiceAdminImpl::FlushTablets(const FlushTabletsRequestPB* req,
for (const TabletId& id : req->tablet_ids()) {
resp->set_failed_tablet_id(id);

TabletPeerPtr tablet_peer;
if (!LookupTabletPeerOrRespond(server_->tablet_peer_lookup(), id, resp, &context,
&tablet_peer)) {
return;
}
auto tablet_peer = VERIFY_RESULT_OR_RETURN(LookupTabletPeerOrRespond(
server_->tablet_peer_lookup(), id, resp, &context));

RETURN_UNKNOWN_ERROR_IF_NOT_OK(
tablet_peer->tablet()->Flush(tablet::FlushMode::kAsync), resp, &context);
Expand Down Expand Up @@ -777,13 +775,16 @@ bool TabletServiceImpl::GetTabletOrRespond(const ReadRequestPB* req,
template <class Req, class Resp>
bool TabletServiceImpl::DoGetTabletOrRespond(const Req* req, Resp* resp, rpc::RpcContext* context,
std::shared_ptr<tablet::AbstractTablet>* tablet) {
TabletPeerPtr tablet_peer;
if (!LookupTabletPeerOrRespond(server_->tablet_peer_lookup(), req->tablet_id(), resp, context,
&tablet_peer)) {
auto tablet_peer_result = LookupTabletPeerOrRespond(
server_->tablet_peer_lookup(), req->tablet_id(), resp, context);

if (!tablet_peer_result.ok()) {
return false;
}

Status s = CheckPeerIsReady(*tablet_peer.get());
auto tablet_peer = std::move(*tablet_peer_result);

Status s = CheckPeerIsReady(*tablet_peer);
if (PREDICT_FALSE(!s.ok())) {
SetupErrorAndRespond(resp->mutable_error(), s, context);
return false;
Expand All @@ -801,7 +802,7 @@ bool TabletServiceImpl::DoGetTabletOrRespond(const Req* req, Resp* resp, rpc::Rp
"consistency level is invalid: YBConsistencyLevel::STRONG";
}

s = CheckPeerIsLeader(*tablet_peer.get());
s = CheckPeerIsLeader(*tablet_peer);
if (PREDICT_FALSE(!s.ok())) {
SetupErrorAndRespond(resp->mutable_error(), s, context);
return false;
Expand Down Expand Up @@ -1136,10 +1137,8 @@ void ConsensusServiceImpl::UpdateConsensus(const ConsensusRequestPB* req,
if (!CheckUuidMatchOrRespond(tablet_manager_, "UpdateConsensus", req, resp, &context)) {
return;
}
TabletPeerPtr tablet_peer;
if (!LookupTabletPeerOrRespond(tablet_manager_, req->tablet_id(), resp, &context, &tablet_peer)) {
return;
}
auto tablet_peer = VERIFY_RESULT_OR_RETURN(LookupTabletPeerOrRespond(
tablet_manager_, req->tablet_id(), resp, &context));

// Submit the update directly to the TabletPeer's Consensus instance.
shared_ptr<Consensus> consensus;
Expand Down Expand Up @@ -1170,10 +1169,8 @@ void ConsensusServiceImpl::RequestConsensusVote(const VoteRequestPB* req,
if (!CheckUuidMatchOrRespond(tablet_manager_, "RequestConsensusVote", req, resp, &context)) {
return;
}
TabletPeerPtr tablet_peer;
if (!LookupTabletPeerOrRespond(tablet_manager_, req->tablet_id(), resp, &context, &tablet_peer)) {
return;
}
auto tablet_peer = VERIFY_RESULT_OR_RETURN(LookupTabletPeerOrRespond(
tablet_manager_, req->tablet_id(), resp, &context));

// Submit the vote request directly to the consensus instance.
shared_ptr<Consensus> consensus;
Expand All @@ -1195,11 +1192,8 @@ void ConsensusServiceImpl::ChangeConfig(const ChangeConfigRequestPB* req,
!CheckUuidMatchOrRespond(tablet_manager_, "ChangeConfig", req, resp, &context)) {
return;
}
TabletPeerPtr tablet_peer;
if (!LookupTabletPeerOrRespond(tablet_manager_, req->tablet_id(), resp, &context,
&tablet_peer)) {
return;
}
auto tablet_peer = VERIFY_RESULT_OR_RETURN(LookupTabletPeerOrRespond(
tablet_manager_, req->tablet_id(), resp, &context));

shared_ptr<Consensus> consensus;
if (!GetConsensusOrRespond(tablet_peer, resp, &context, &consensus)) return;
Expand Down Expand Up @@ -1241,10 +1235,8 @@ class RpcScope {
if (!CheckUuidMatchOrRespond(tablet_manager, method_name, req, resp, context)) {
return;
}
TabletPeerPtr tablet_peer;
if (!LookupTabletPeerOrRespond(tablet_manager, req->tablet_id(), resp, context, &tablet_peer)) {
return;
}
auto tablet_peer = VERIFY_RESULT_OR_RETURN(LookupTabletPeerOrRespond(
tablet_manager, req->tablet_id(), resp, context));

if (!GetConsensusOrRespond(tablet_peer, resp, context, &consensus_)) {
return;
Expand Down Expand Up @@ -1339,10 +1331,8 @@ void ConsensusServiceImpl::GetLastOpId(const consensus::GetLastOpIdRequestPB *re
if (!CheckUuidMatchOrRespond(tablet_manager_, "GetLastOpId", req, resp, &context)) {
return;
}
TabletPeerPtr tablet_peer;
if (!LookupTabletPeerOrRespond(tablet_manager_, req->tablet_id(), resp, &context, &tablet_peer)) {
return;
}
auto tablet_peer = VERIFY_RESULT_OR_RETURN(LookupTabletPeerOrRespond(
tablet_manager_, req->tablet_id(), resp, &context));

if (tablet_peer->state() != tablet::RUNNING) {
SetupErrorAndRespond(resp->mutable_error(),
Expand Down Expand Up @@ -1511,11 +1501,9 @@ void TabletServiceImpl::Checksum(const ChecksumRequestPB* req,
void TabletServiceImpl::ImportData(const ImportDataRequestPB* req,
ImportDataResponsePB* resp,
rpc::RpcContext context) {
tablet::TabletPeerPtr peer;
if (!LookupTabletPeerOrRespond(server_->tablet_peer_lookup(), req->tablet_id(), resp, &context,
&peer)) {
return;
}
auto peer = VERIFY_RESULT_OR_RETURN(LookupTabletPeerOrRespond(
server_->tablet_peer_lookup(), req->tablet_id(), resp, &context));

auto status = peer->tablet()->ImportData(req->source_dir());
if (!status.ok()) {
SetupErrorAndRespond(resp->mutable_error(),
Expand Down
9 changes: 5 additions & 4 deletions src/yb/tserver/tablet_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,11 @@ class TabletServiceImpl : public TabletServerServiceIf {
bool DoGetTabletOrRespond(const Req* req, Resp* resp, rpc::RpcContext* context,
std::shared_ptr<tablet::AbstractTablet>* tablet);

virtual bool GetTabletOrRespond(const ReadRequestPB* req,
ReadResponsePB* resp,
rpc::RpcContext* context,
std::shared_ptr<tablet::AbstractTablet>* tablet);
virtual WARN_UNUSED_RESULT bool GetTabletOrRespond(
const ReadRequestPB* req,
ReadResponsePB* resp,
rpc::RpcContext* context,
std::shared_ptr<tablet::AbstractTablet>* tablet);

template<class Resp>
bool CheckMemoryPressure(
Expand Down
8 changes: 7 additions & 1 deletion src/yb/util/result.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,14 @@ struct ResultTraits<TValue&> {
static TValue* GetPtr(const Stored* value) { return *value; }
};

#ifdef __clang__
#define NODISCARD_CLASS [[nodiscard]]
#else
#define NODISCARD_CLASS
#endif

template<class TValue>
class Result {
class NODISCARD_CLASS Result {
public:
typedef ResultTraits<TValue> Traits;

Expand Down

0 comments on commit 566d6d2

Please sign in to comment.