diff --git a/client/clientFactory_mock.go b/client/clientFactory_mock.go index 84c91e7d108..01d1ed1b647 100644 --- a/client/clientFactory_mock.go +++ b/client/clientFactory_mock.go @@ -194,15 +194,15 @@ func (m *MockFactoryProvider) EXPECT() *MockFactoryProviderMockRecorder { } // NewFactory mocks base method. -func (m *MockFactoryProvider) NewFactory(rpcFactory common.RPCFactory, monitor membership.Monitor, metricsClient metrics.Client, dc *dynamicconfig.Collection, numberOfHistoryShards int32, logger log.Logger) Factory { +func (m *MockFactoryProvider) NewFactory(rpcFactory common.RPCFactory, monitor membership.Monitor, metricsClient metrics.Client, dc *dynamicconfig.Collection, numberOfHistoryShards int32, logger, throttledLogger log.Logger) Factory { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "NewFactory", rpcFactory, monitor, metricsClient, dc, numberOfHistoryShards, logger) + ret := m.ctrl.Call(m, "NewFactory", rpcFactory, monitor, metricsClient, dc, numberOfHistoryShards, logger, throttledLogger) ret0, _ := ret[0].(Factory) return ret0 } // NewFactory indicates an expected call of NewFactory. -func (mr *MockFactoryProviderMockRecorder) NewFactory(rpcFactory, monitor, metricsClient, dc, numberOfHistoryShards, logger interface{}) *gomock.Call { +func (mr *MockFactoryProviderMockRecorder) NewFactory(rpcFactory, monitor, metricsClient, dc, numberOfHistoryShards, logger, throttledLogger interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NewFactory", reflect.TypeOf((*MockFactoryProvider)(nil).NewFactory), rpcFactory, monitor, metricsClient, dc, numberOfHistoryShards, logger) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NewFactory", reflect.TypeOf((*MockFactoryProvider)(nil).NewFactory), rpcFactory, monitor, metricsClient, dc, numberOfHistoryShards, logger, throttledLogger) } diff --git a/client/clientfactory.go b/client/clientfactory.go index d2e949a2973..3766b1f904b 100644 --- a/client/clientfactory.go +++ b/client/clientfactory.go @@ -72,6 +72,7 @@ type ( dc *dynamicconfig.Collection, numberOfHistoryShards int32, logger log.Logger, + throttledLogger log.Logger, ) Factory } @@ -85,6 +86,7 @@ type ( dynConfig *dynamicconfig.Collection numberOfHistoryShards int32 logger log.Logger + throttledLogger log.Logger } factoryProviderImpl struct { @@ -111,6 +113,7 @@ func (p *factoryProviderImpl) NewFactory( dc *dynamicconfig.Collection, numberOfHistoryShards int32, logger log.Logger, + throttledLogger log.Logger, ) Factory { return &rpcClientFactory{ rpcFactory: rpcFactory, @@ -119,6 +122,7 @@ func (p *factoryProviderImpl) NewFactory( dynConfig: dc, numberOfHistoryShards: numberOfHistoryShards, logger: logger, + throttledLogger: throttledLogger, } } @@ -148,7 +152,7 @@ func (cf *rpcClientFactory) NewHistoryClientWithTimeout(timeout time.Duration) ( clientCache := common.NewClientCache(keyResolver, clientProvider) client := history.NewClient(cf.numberOfHistoryShards, timeout, clientCache, cf.logger) if cf.metricsClient != nil { - client = history.NewMetricClient(client, cf.metricsClient, cf.logger) + client = history.NewMetricClient(client, cf.metricsClient, cf.logger, cf.throttledLogger) } return client, nil } @@ -176,7 +180,7 @@ func (cf *rpcClientFactory) NewMatchingClientWithTimeout( ) if cf.metricsClient != nil { - client = matching.NewMetricClient(client, cf.metricsClient, cf.logger) + client = matching.NewMetricClient(client, cf.metricsClient, cf.logger, cf.throttledLogger) } return client, nil diff --git a/client/history/metricClient.go b/client/history/metricClient.go index b435403f0db..bcfa663f230 100644 --- a/client/history/metricClient.go +++ b/client/history/metricClient.go @@ -38,9 +38,10 @@ import ( var _ historyservice.HistoryServiceClient = (*metricClient)(nil) type metricClient struct { - client historyservice.HistoryServiceClient - metricsClient metrics.Client - logger log.Logger + client historyservice.HistoryServiceClient + metricsClient metrics.Client + logger log.Logger + throttledLogger log.Logger } // NewMetricClient creates a new instance of historyservice.HistoryServiceClient that emits metrics @@ -48,11 +49,13 @@ func NewMetricClient( client historyservice.HistoryServiceClient, metricsClient metrics.Client, logger log.Logger, + throttledLogger log.Logger, ) historyservice.HistoryServiceClient { return &metricClient{ - client: client, - metricsClient: metricsClient, - logger: logger, + client: client, + metricsClient: metricsClient, + logger: logger, + throttledLogger: throttledLogger, } } @@ -614,8 +617,8 @@ func (c *metricClient) finishMetricsRecording( err error, ) { if err != nil { - c.logger.Error("history client encountered error", tag.Error(err)) - scope.IncCounter(metrics.ClientFailures) + c.throttledLogger.Error("history client encountered error", tag.Error(err)) + scope.Tagged(metrics.ServiceErrorTypeTag(err)).IncCounter(metrics.ClientFailures) } stopwatch.Stop() } diff --git a/client/matching/metricClient.go b/client/matching/metricClient.go index 29a067e3414..7b3964570d6 100644 --- a/client/matching/metricClient.go +++ b/client/matching/metricClient.go @@ -40,9 +40,10 @@ import ( var _ matchingservice.MatchingServiceClient = (*metricClient)(nil) type metricClient struct { - client matchingservice.MatchingServiceClient - metricsClient metrics.Client - logger log.Logger + client matchingservice.MatchingServiceClient + metricsClient metrics.Client + logger log.Logger + throttledLogger log.Logger } // NewMetricClient creates a new instance of matchingservice.MatchingServiceClient that emits metrics @@ -50,11 +51,13 @@ func NewMetricClient( client matchingservice.MatchingServiceClient, metricsClient metrics.Client, logger log.Logger, + throttledLogger log.Logger, ) matchingservice.MatchingServiceClient { return &metricClient{ - client: client, - metricsClient: metricsClient, - logger: logger, + client: client, + metricsClient: metricsClient, + logger: logger, + throttledLogger: throttledLogger, } } @@ -253,8 +256,8 @@ func (c *metricClient) finishMetricsRecording( err error, ) { if err != nil { - c.logger.Error("matching client encountered error", tag.Error(err)) - scope.IncCounter(metrics.ClientFailures) + c.throttledLogger.Error("matching client encountered error", tag.Error(err)) + scope.Tagged(metrics.ServiceErrorTypeTag(err)).IncCounter(metrics.ClientFailures) } stopwatch.Stop() } diff --git a/common/metrics/defs.go b/common/metrics/defs.go index 8a756c9fdcb..b59921d8994 100644 --- a/common/metrics/defs.go +++ b/common/metrics/defs.go @@ -100,6 +100,7 @@ const ( TaskTypeTagName = "task_type" QueueTypeTagName = "queue_type" visibilityTypeTagName = "visibility_type" + ErrorTypeTagName = "error_type" httpStatusTagName = "http_status" resourceExhaustedTag = "resource_exhausted_cause" ) @@ -1716,6 +1717,23 @@ var ScopeDefs = map[ServiceIdx]map[int]scopeDefinition{ }, } +// Common error type +const ( + ErrorTypeUnknown = "unknown_error_type" + ErrorTypeInvalidArgument = "invalid_argument_error_type" + ErrorTypeInternal = "internal_error_type" + ErrorTypeUnavailable = "unavailable_error_type" + ErrorTypeCanceled = "canceled_error_type" + ErrorTypeTimedOut = "timed_out_error_type" + ErrorTypeNotFound = "not_found_error_type" + ErrorTypeNamespaceNotActive = "namespace_not_active_error_type" + ErrorTypeQueryFailed = "query_failed_error_type" + ErrorTypeClientVersionNotSupported = "client_version_not_supported_error_type" + ErrorTypeServerVersionNotSupported = "server_version_not_supported_error_type" + ErrorTypePermissionDenied = "permission_denied_error_type" + ErrorTypeResourceExhausted = "resource_exhausted_error_type" +) + // Common Metrics enum const ( ServiceRequests = iota diff --git a/common/metrics/defs_test.go b/common/metrics/defs_test.go index 1c3fbe6b2dc..d0c296e3800 100644 --- a/common/metrics/defs_test.go +++ b/common/metrics/defs_test.go @@ -29,6 +29,9 @@ import ( "regexp" "testing" + "go.temporal.io/api/enums/v1" + "go.temporal.io/api/serviceerror" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -109,3 +112,32 @@ func TestMetricDefs(t *testing.T) { } } } + +func TestGetServiceErrorType(t *testing.T) { + testData := []struct { + err error + expectedResult string + }{ + {serviceerror.NewInvalidArgument(""), ErrorTypeInvalidArgument}, + {serviceerror.NewCanceled(""), ErrorTypeCanceled}, + {serviceerror.NewDataLoss(""), ErrorTypeInternal}, + {serviceerror.NewInternal(""), ErrorTypeInternal}, + {serviceerror.NewCancellationAlreadyRequested(""), ErrorTypeInvalidArgument}, + {serviceerror.NewNamespaceAlreadyExists(""), ErrorTypeInvalidArgument}, + {serviceerror.NewWorkflowExecutionAlreadyStarted("", "", ""), ErrorTypeInvalidArgument}, + {serviceerror.NewClientVersionNotSupported("", "", ""), ErrorTypeClientVersionNotSupported}, + {serviceerror.NewServerVersionNotSupported("", ""), ErrorTypeServerVersionNotSupported}, + {serviceerror.NewDeadlineExceeded(""), ErrorTypeTimedOut}, + {serviceerror.NewNamespaceNotActive("", "", ""), ErrorTypeNamespaceNotActive}, + {serviceerror.NewNotFound(""), ErrorTypeNotFound}, + {serviceerror.NewPermissionDenied("", ""), ErrorTypePermissionDenied}, + {serviceerror.NewQueryFailed(""), ErrorTypeQueryFailed}, + {serviceerror.NewResourceExhausted(enums.RESOURCE_EXHAUSTED_CAUSE_UNSPECIFIED, ""), ErrorTypeResourceExhausted}, + {serviceerror.NewUnavailable(""), ErrorTypeUnavailable}, + {serviceerror.NewUnimplemented(""), ErrorTypeUnknown}, + } + + for id, data := range testData { + assert.Equal(t, data.expectedResult, getErrorType(data.err), "Unexpected error type in index", id) + } +} diff --git a/common/metrics/tags.go b/common/metrics/tags.go index 30ad369093f..827aecdf4c9 100644 --- a/common/metrics/tags.go +++ b/common/metrics/tags.go @@ -27,6 +27,8 @@ package metrics import ( "strconv" + "go.temporal.io/api/serviceerror" + enumspb "go.temporal.io/api/enums/v1" ) @@ -199,6 +201,10 @@ func VisibilityTypeTag(value string) Tag { return &tagImpl{key: visibilityTypeTagName, value: value} } +func ServiceErrorTypeTag(err error) Tag { + return &tagImpl{key: ErrorTypeTagName, value: getErrorType(err)} +} + var standardVisibilityTypeTag = VisibilityTypeTag(standardVisibilityTagValue) var advancedVisibilityTypeTag = VisibilityTypeTag(advancedVisibilityTagValue) @@ -218,3 +224,37 @@ func HttpStatusTag(value int) Tag { func ResourceExhaustedCauseTag(cause enumspb.ResourceExhaustedCause) Tag { return &tagImpl{key: resourceExhaustedTag, value: cause.String()} } + +func getErrorType(err error) string { + switch err.(type) { + case *serviceerror.InvalidArgument, + *serviceerror.CancellationAlreadyRequested, + *serviceerror.NamespaceAlreadyExists, + *serviceerror.WorkflowExecutionAlreadyStarted: + return ErrorTypeInvalidArgument + case *serviceerror.Internal, *serviceerror.DataLoss: + return ErrorTypeInternal + case *serviceerror.Unavailable: + return ErrorTypeUnavailable + case *serviceerror.NotFound: + return ErrorTypeNotFound + case *serviceerror.Canceled: + return ErrorTypeCanceled + case *serviceerror.DeadlineExceeded: + return ErrorTypeTimedOut + case *serviceerror.NamespaceNotActive: + return ErrorTypeNamespaceNotActive + case *serviceerror.QueryFailed: + return ErrorTypeQueryFailed + case *serviceerror.ClientVersionNotSupported: + return ErrorTypeClientVersionNotSupported + case *serviceerror.ServerVersionNotSupported: + return ErrorTypeServerVersionNotSupported + case *serviceerror.PermissionDenied: + return ErrorTypePermissionDenied + case *serviceerror.ResourceExhausted: + return ErrorTypeResourceExhausted + default: + return ErrorTypeUnknown + } +} diff --git a/common/resource/fx.go b/common/resource/fx.go index 6fbe6560d7e..f15d28a9258 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -208,6 +208,7 @@ func ClientFactoryProvider( dynamicCollection *dynamicconfig.Collection, persistenceConfig *config.Persistence, logger SnTaggedLogger, + throttledLogger ThrottledLogger, ) client.Factory { return factoryProvider.NewFactory( rpcFactory, @@ -216,6 +217,7 @@ func ClientFactoryProvider( dynamicCollection, persistenceConfig.NumHistoryShards, logger, + throttledLogger, ) }