From 7654c9608627d166713c3eb3ddd87d342a22b83c Mon Sep 17 00:00:00 2001 From: Rowan Seymour Date: Tue, 17 Dec 2024 12:11:28 -0500 Subject: [PATCH] Simplify metrics reporting --- backends/rapidpro/backend.go | 32 +++++++++++-------------------- backends/rapidpro/backend_test.go | 1 - server.go | 23 +++++++--------------- 3 files changed, 18 insertions(+), 38 deletions(-) diff --git a/backends/rapidpro/backend.go b/backends/rapidpro/backend.go index 5fecf1cc7..709d30abb 100644 --- a/backends/rapidpro/backend.go +++ b/backends/rapidpro/backend.go @@ -54,9 +54,7 @@ type stats struct { // both sqlx and redis provide wait stats which are cummulative that we need to convert into increments by // tracking their previous values dbWaitDuration time.Duration - dbWaitCount int64 redisWaitDuration time.Duration - redisWaitCount int64 } type backend struct { @@ -777,23 +775,18 @@ func (b *backend) Heartbeat() error { redisStats := b.rp.Stats() dbWaitDurationInPeriod := dbStats.WaitDuration - b.stats.dbWaitDuration - dbWaitCountInPeriod := dbStats.WaitCount - b.stats.dbWaitCount redisWaitDurationInPeriod := redisStats.WaitDuration - b.stats.redisWaitDuration - redisWaitCountInPeriod := redisStats.WaitCount - b.stats.redisWaitCount b.stats.dbWaitDuration = dbStats.WaitDuration - b.stats.dbWaitCount = dbStats.WaitCount b.stats.redisWaitDuration = redisStats.WaitDuration - b.stats.redisWaitCount = redisStats.WaitCount hostDim := cwatch.Dimension("Host", b.config.InstanceID) - appDim := cwatch.Dimension("App", "courier") b.CloudWatch().Queue( - cwatch.Datum("DBConnectionsInUse", float64(dbStats.InUse), cwtypes.StandardUnitCount, hostDim, appDim), - cwatch.Datum("DBConnectionWaitDuration", float64(dbWaitDurationInPeriod/time.Millisecond), cwtypes.StandardUnitMilliseconds, hostDim, appDim), - cwatch.Datum("RedisConnectionsInUse", float64(redisStats.ActiveCount), cwtypes.StandardUnitCount, hostDim, appDim), - cwatch.Datum("RedisConnectionsWaitDuration", float64(redisWaitDurationInPeriod/time.Millisecond), cwtypes.StandardUnitMilliseconds, hostDim, appDim), + cwatch.Datum("DBConnectionsInUse", float64(dbStats.InUse), cwtypes.StandardUnitCount, hostDim), + cwatch.Datum("DBConnectionWaitDuration", float64(dbWaitDurationInPeriod/time.Millisecond), cwtypes.StandardUnitMilliseconds, hostDim), + cwatch.Datum("RedisConnectionsInUse", float64(redisStats.ActiveCount), cwtypes.StandardUnitCount, hostDim), + cwatch.Datum("RedisConnectionsWaitDuration", float64(redisWaitDurationInPeriod/time.Millisecond), cwtypes.StandardUnitMilliseconds, hostDim), ) b.CloudWatch().Queue( @@ -801,17 +794,14 @@ func (b *backend) Heartbeat() error { cwatch.Datum("QueuedMsgs", float64(prioritySize), cwtypes.StandardUnitCount, cwatch.Dimension("QueueName", "priority")), ) - slog.Info("current metrics", "db_busy", dbStats.InUse, - "db_idle", dbStats.Idle, - "db_wait_time", dbWaitDurationInPeriod, - "db_wait_count", dbWaitCountInPeriod, - "redis_active", redisStats.ActiveCount, - "redis_idle", redisStats.IdleCount, - "redis_wait_time", redisWaitDurationInPeriod, - "redis_wait_count", redisWaitCountInPeriod, + slog.Info("current metrics", + "db_inuse", dbStats.InUse, + "db_wait", dbWaitDurationInPeriod, + "redis_inuse", redisStats.ActiveCount, + "redis_wait", redisWaitDurationInPeriod, "priority_size", prioritySize, - "bulk_size", bulkSize) - + "bulk_size", bulkSize, + ) return nil } diff --git a/backends/rapidpro/backend_test.go b/backends/rapidpro/backend_test.go index 9817196fa..49393ed01 100644 --- a/backends/rapidpro/backend_test.go +++ b/backends/rapidpro/backend_test.go @@ -767,7 +767,6 @@ func (ts *BackendTestSuite) TestHealth() { } func (ts *BackendTestSuite) TestHeartbeat() { - // TODO make metrics abstraction layer so we can test what we report ts.NoError(ts.b.Heartbeat()) } diff --git a/server.go b/server.go index ccf7b479d..556bae733 100644 --- a/server.go +++ b/server.go @@ -306,18 +306,15 @@ func (s *server) channelHandleWrapper(handler ChannelHandler, handlerFunc Channe } if channel != nil { - // if we have a channel but no events were created, we still log this to metrics + cw := s.Backend().CloudWatch() channelTypeDim := cwatch.Dimension("ChannelType", string(channel.ChannelType())) + // if we have a channel but no events were created, we still log this to metrics if len(events) == 0 { if hErr != nil { - s.Backend().CloudWatch().Queue( - cwatch.Datum("ChannelError", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim), - ) + cw.Queue(cwatch.Datum("ChannelError", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim)) } else { - s.Backend().CloudWatch().Queue( - cwatch.Datum("ChannelIgnored", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim), - ) + cw.Queue(cwatch.Datum("ChannelIgnored", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim)) } } @@ -325,20 +322,14 @@ func (s *server) channelHandleWrapper(handler ChannelHandler, handlerFunc Channe switch e := event.(type) { case MsgIn: clog.SetAttached(true) - s.Backend().CloudWatch().Queue( - cwatch.Datum("MsgReceive", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim), - ) + cw.Queue(cwatch.Datum("MsgReceive", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim)) LogMsgReceived(r, e) case StatusUpdate: clog.SetAttached(true) - s.Backend().CloudWatch().Queue( - cwatch.Datum("MsgStatus", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim), - ) + cw.Queue(cwatch.Datum("MsgStatus", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim)) LogMsgStatusReceived(r, e) case ChannelEvent: - s.Backend().CloudWatch().Queue( - cwatch.Datum("EventReceive", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim), - ) + cw.Queue(cwatch.Datum("EventReceive", float64(secondDuration), types.StandardUnitSeconds, channelTypeDim)) LogChannelEventReceived(r, e) } }