Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

issue-506: improve tests, report critical event when request info doe… #633

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions cloud/filestore/libs/diagnostics/critical_events.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,46 @@ void InitCriticalEventsCounter(NMonitoring::TDynamicCountersPtr counters)
// FILESTORE_INIT_CRITICAL_EVENT_COUNTER

FILESTORE_CRITICAL_EVENTS(FILESTORE_INIT_CRITICAL_EVENT_COUNTER)
FILESTORE_IMPOSSIBLE_EVENTS(FILESTORE_INIT_CRITICAL_EVENT_COUNTER)
#undef FILESTORE_INIT_CRITICAL_EVENT_COUNTER

NCloud::InitCriticalEventsCounter(std::move(counters));
}

#define FILESTORE_DEFINE_CRITICAL_EVENT_ROUTINE(name) \
void Report##name() \
TString Report##name(const TString& message) \
{ \
ReportCriticalEvent("AppCriticalEvents/"#name, "", false); \
return ReportCriticalEvent( \
GetCriticalEventFor##name(), \
message, \
false); \
} \
\
const TString GetCriticalEventFor##name() \
{ \
return "AppCriticalEvents/"#name; \
} \
// FILESTORE_DEFINE_CRITICAL_EVENT_ROUTINE

FILESTORE_CRITICAL_EVENTS(FILESTORE_DEFINE_CRITICAL_EVENT_ROUTINE)
#undef FILESTORE_DEFINE_CRITICAL_EVENT_ROUTINE

#define FILESTORE_DEFINE_IMPOSSIBLE_EVENT_ROUTINE(name) \
TString Report##name(const TString& message) \
{ \
return ReportCriticalEvent( \
GetCriticalEventFor##name(), \
message, \
true); \
} \
\
const TString GetCriticalEventFor##name() \
{ \
return "AppCriticalEvents/"#name; \
} \
// FILESTORE_DEFINE_IMPOSSIBLE_EVENT_ROUTINE

FILESTORE_IMPOSSIBLE_EVENTS(FILESTORE_DEFINE_IMPOSSIBLE_EVENT_ROUTINE)
#undef FILESTORE_DEFINE_IMPOSSIBLE_EVENT_ROUTINE

} // namespace NCloud::NFileStore
15 changes: 14 additions & 1 deletion cloud/filestore/libs/diagnostics/critical_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,28 @@ namespace NCloud::NFileStore{
xxx(DescribeFileStoreError) \
// FILESTORE_CRITICAL_EVENTS

#define FILESTORE_IMPOSSIBLE_EVENTS(xxx) \
xxx(CancelRoutineIsNotSet) \
// FILESTORE_IMPOSSIBLE_EVENTS

////////////////////////////////////////////////////////////////////////////////

void InitCriticalEventsCounter(NMonitoring::TDynamicCountersPtr counters);

#define FILESTORE_DECLARE_CRITICAL_EVENT_ROUTINE(name) \
void Report##name(); \
TString Report##name(const TString& message = ""); \
const TString GetCriticalEventFor##name(); \
// FILESTORE_DECLARE_CRITICAL_EVENT_ROUTINE

FILESTORE_CRITICAL_EVENTS(FILESTORE_DECLARE_CRITICAL_EVENT_ROUTINE)
#undef FILESTORE_DECLARE_CRITICAL_EVENT_ROUTINE

#define FILESTORE_DECLARE_IMPOSSIBLE_EVENT_ROUTINE(name) \
TString Report##name(const TString& message = ""); \
const TString GetCriticalEventFor##name(); \
// FILESTORE_DECLARE_IMPOSSIBLE_EVENT_ROUTINE

FILESTORE_IMPOSSIBLE_EVENTS(FILESTORE_DECLARE_IMPOSSIBLE_EVENT_ROUTINE)
#undef FILESTORE_DECLARE_IMPOSSIBLE_EVENT_ROUTINE

} // namespace NCloud::NFileStore
12 changes: 11 additions & 1 deletion cloud/filestore/libs/storage/core/request_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "public.h"

#include <cloud/filestore/libs/diagnostics/critical_events.h>
#include <cloud/filestore/libs/service/context.h>
#include <cloud/filestore/libs/storage/api/events.h>

Expand Down Expand Up @@ -48,6 +49,15 @@ struct TRequestInfo
, CallContext(std::move(callContext))
{}

void CancelRequest(const NActors::TActorContext& ctx)
{
if (!CancelRoutine) {
ReportCancelRoutineIsNotSet();
return;
};
CancelRoutine(ctx, *this);
}

void AddExecCycles(ui64 cycles)
{
AtomicAdd(ExecCycles, cycles);
Expand Down Expand Up @@ -121,7 +131,7 @@ TRequestInfoPtr CreateRequestInfo(
TRequestInfo& requestInfo)
{
auto response = std::make_unique<typename TMethod::TResponse>(
MakeError(E_REJECTED, "tablet is dead"));
MakeError(E_REJECTED, "tablet is shutting down"));

NCloud::Reply(ctx, requestInfo, std::move(response));
};
Expand Down
2 changes: 1 addition & 1 deletion cloud/filestore/libs/storage/tablet/tablet_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ void TIndexTabletActor::TerminateTransactions(const TActorContext& ctx)
TRequestInfo* requestInfo = ActiveTransactions.PopFront();
TABLET_VERIFY(requestInfo->RefCount() >= 1);

requestInfo->CancelRoutine(ctx, *requestInfo);
requestInfo->CancelRequest(ctx);
requestInfo->UnRef();
}
}
Expand Down
4 changes: 2 additions & 2 deletions cloud/filestore/libs/storage/tablet/tablet_actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ class TIndexTabletActor final
TRequestInfo& requestInfo)
{
auto response = std::make_unique<typename TMethod::TResponse>(
MakeError(E_REJECTED, "tablet is dead"));
MakeError(E_REJECTED, "tablet is shutting down"));

NCloud::Reply(ctx, requestInfo, std::move(response));
};
Expand Down Expand Up @@ -281,7 +281,7 @@ class TIndexTabletActor final
TRequestInfo& requestInfo)
{
auto response = std::make_unique<typename TMethod::TResponse>(
MakeError(E_REJECTED, "request cancelled"));
MakeError(E_REJECTED, "tablet is shutting down"));

NCloud::Reply(ctx, requestInfo, std::move(response));
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ void TCleanupSessionsActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TCleanupSessionsActor::ReplyAndDie(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ void TCollectGarbageActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TCollectGarbageActor::HandleError(NProto::TError error)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ void TCompactionActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TCompactionActor::ReplyAndDie(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ void TDestroyCheckpointActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TDestroyCheckpointActor::ReplyAndDie(
Expand Down
2 changes: 1 addition & 1 deletion cloud/filestore/libs/storage/tablet/tablet_actor_flush.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ void TFlushActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TFlushActor::ReplyAndDie(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ void TFlushBytesActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TFlushBytesActor::ReplyAndDie(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ struct TIndexTabletMonitoringActor
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"), {});
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"), {});
}

void ReplyAndDie(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ void TReadBlobActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TReadBlobActor::ReplyError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ void TReadDataActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TReadDataActor::ReplyAndDie(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ void TWriteBatchActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TWriteBatchActor::ReplyAndDie(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ void TWriteBlobActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TWriteBlobActor::ReplyError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ void TWriteDataActor::HandlePoisonPill(
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeError(E_REJECTED, "request cancelled"));
ReplyAndDie(ctx, MakeError(E_REJECTED, "tablet is shutting down"));
}

void TWriteDataActor::ReplyAndDie(
Expand Down
23 changes: 18 additions & 5 deletions cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3763,9 +3763,11 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data)
return false;
});

env.GetRuntime().DispatchEvents(TDispatchOptions(), TDuration::Seconds(1));

UNIT_ASSERT(putSeen);
TDispatchOptions options;
options.CustomFinalCondition = [&] {
return putSeen;
};
env.GetRuntime().DispatchEvents(options);

tablet.RebootTablet();

Expand All @@ -3774,6 +3776,10 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data)
E_REJECTED,
response->GetStatus(),
response->GetErrorReason());

UNIT_ASSERT_VALUES_EQUAL(
"tablet is shutting down",
response->GetErrorReason());
}

TABLET_TEST(ShouldCancelWriteRequestsIfTabletIsRebooted)
Expand Down Expand Up @@ -3826,9 +3832,12 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data)
return false;
});

env.GetRuntime().DispatchEvents(TDispatchOptions(), TDuration::Seconds(1));
TDispatchOptions options;
options.CustomFinalCondition = [&] {
return getSeen;
};

UNIT_ASSERT(getSeen);
env.GetRuntime().DispatchEvents(options);

failGet = false;
tablet.RebootTablet();
Expand All @@ -3838,6 +3847,10 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data)
E_REJECTED,
response->GetStatus(),
response->GetErrorReason());

UNIT_ASSERT_VALUES_EQUAL(
"tablet is shutting down",
response->GetErrorReason());
}

#undef TABLET_TEST
Expand Down