From 8ea69ec3f86b36d3e6d2dbc60b7b7fb02e4b271f Mon Sep 17 00:00:00 2001 From: Paul Rogers Date: Thu, 9 May 2024 11:04:51 -0400 Subject: [PATCH 1/5] First pass at unregistering metrics so that we can run multiple tests --- .../deletion/delete_requests_client_test.go | 1 + pkg/compactor/deletion/metrics.go | 5 +++++ pkg/loki/loki_test.go | 21 +++++++++++++++++++ pkg/pattern/flush_test.go | 2 +- pkg/scheduler/scheduler_test.go | 2 +- .../client/congestion/congestion_test.go | 16 ++++++++++---- .../client/congestion/controller_test.go | 5 +++++ .../chunk/client/congestion/metrics.go | 5 +++++ tools/tsdb/migrate-versions/main_test.go | 1 + 9 files changed, 52 insertions(+), 6 deletions(-) diff --git a/pkg/compactor/deletion/delete_requests_client_test.go b/pkg/compactor/deletion/delete_requests_client_test.go index 226891471196..248b75aafe67 100644 --- a/pkg/compactor/deletion/delete_requests_client_test.go +++ b/pkg/compactor/deletion/delete_requests_client_test.go @@ -74,6 +74,7 @@ func TestGetCacheGenNumberForUser(t *testing.T) { client.Stop() }) + deleteClientMetrics.Unregister() } type mockCompactorClient struct { diff --git a/pkg/compactor/deletion/metrics.go b/pkg/compactor/deletion/metrics.go index 9d89f46c88d9..c6477062167d 100644 --- a/pkg/compactor/deletion/metrics.go +++ b/pkg/compactor/deletion/metrics.go @@ -12,6 +12,11 @@ type DeleteRequestClientMetrics struct { deleteRequestsLookupsFailedTotal prometheus.Counter } +func (m DeleteRequestClientMetrics) Unregister() { + prometheus.Unregister(m.deleteRequestsLookupsTotal) + prometheus.Unregister(m.deleteRequestsLookupsFailedTotal) +} + func NewDeleteRequestClientMetrics(r prometheus.Registerer) *DeleteRequestClientMetrics { m := DeleteRequestClientMetrics{} diff --git a/pkg/loki/loki_test.go b/pkg/loki/loki_test.go index a4e6ff73ca56..1691811ee736 100644 --- a/pkg/loki/loki_test.go +++ b/pkg/loki/loki_test.go @@ -4,6 +4,10 @@ import ( "bytes" "flag" "fmt" + "github.com/grafana/loki/v3/pkg/util/constants" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" + "github.com/prometheus/client_golang/prometheus/collectors/version" "io" "net" "net/http" @@ -251,4 +255,21 @@ schema_config: require.NoError(t, err) require.Equal(t, string(bBytes), "abc") assert.True(t, customHandlerInvoked) + unregisterLokiMetrics(loki) +} + +func unregisterLokiMetrics(loki *Loki) { + loki.ClientMetrics.Unregister() + loki.deleteClientMetrics.Unregister() + prometheus.Unregister(version.NewCollector(constants.Loki)) + prometheus.Unregister(collectors.NewGoCollector( + collectors.WithGoCollectorRuntimeMetrics(collectors.MetricsAll), + )) + //TODO Update DSKit to have a method to unregister these metrics + prometheus.Unregister(loki.Metrics.TCPConnections) + prometheus.Unregister(loki.Metrics.TCPConnectionsLimit) + prometheus.Unregister(loki.Metrics.RequestDuration) + prometheus.Unregister(loki.Metrics.ReceivedMessageSize) + prometheus.Unregister(loki.Metrics.SentMessageSize) + prometheus.Unregister(loki.Metrics.InflightRequests) } diff --git a/pkg/pattern/flush_test.go b/pkg/pattern/flush_test.go index 4d70eea5c3e1..fb30a96aa671 100644 --- a/pkg/pattern/flush_test.go +++ b/pkg/pattern/flush_test.go @@ -23,7 +23,7 @@ import ( ) func TestSweepInstance(t *testing.T) { - ing, err := New(defaultIngesterTestConfig(t), "foo", prometheus.DefaultRegisterer, log.NewNopLogger()) + ing, err := New(defaultIngesterTestConfig(t), "foo", prometheus.NewRegistry(), log.NewNopLogger()) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), ing) //nolint:errcheck err = services.StartAndAwaitRunning(context.Background(), ing) diff --git a/pkg/scheduler/scheduler_test.go b/pkg/scheduler/scheduler_test.go index 7f8d88e4d679..c9f569c5a588 100644 --- a/pkg/scheduler/scheduler_test.go +++ b/pkg/scheduler/scheduler_test.go @@ -27,7 +27,7 @@ func TestScheduler_setRunState(t *testing.T) { // we make a Scheduler with the things required to avoid nil pointers s := Scheduler{ log: util_log.Logger, - schedulerRunning: promauto.With(prometheus.DefaultRegisterer).NewGauge(prometheus.GaugeOpts{ + schedulerRunning: promauto.With(prometheus.NewRegistry()).NewGauge(prometheus.GaugeOpts{ Name: "cortex_query_scheduler_running", Help: "Value will be 1 if the scheduler is in the ReplicationSet and actively receiving/processing requests", }), diff --git a/pkg/storage/chunk/client/congestion/congestion_test.go b/pkg/storage/chunk/client/congestion/congestion_test.go index c9c9b0d39809..4f86c888d54a 100644 --- a/pkg/storage/chunk/client/congestion/congestion_test.go +++ b/pkg/storage/chunk/client/congestion/congestion_test.go @@ -9,11 +9,13 @@ import ( func TestZeroValueConstruction(t *testing.T) { cfg := Config{} - ctrl := NewController(cfg, log.NewNopLogger(), NewMetrics(t.Name(), cfg)) + m := NewMetrics(t.Name(), cfg) + ctrl := NewController(cfg, log.NewNopLogger(), m) require.IsType(t, &NoopController{}, ctrl) require.IsType(t, &NoopRetrier{}, ctrl.getRetrier()) require.IsType(t, &NoopHedger{}, ctrl.getHedger()) + m.Unregister() } func TestAIMDConstruction(t *testing.T) { @@ -22,11 +24,13 @@ func TestAIMDConstruction(t *testing.T) { Strategy: "aimd", }, } - ctrl := NewController(cfg, log.NewNopLogger(), NewMetrics(t.Name(), cfg)) + m := NewMetrics(t.Name(), cfg) + ctrl := NewController(cfg, log.NewNopLogger(), m) require.IsType(t, &AIMDController{}, ctrl) require.IsType(t, &NoopRetrier{}, ctrl.getRetrier()) require.IsType(t, &NoopHedger{}, ctrl.getHedger()) + m.Unregister() } func TestRetrierConstruction(t *testing.T) { @@ -35,11 +39,13 @@ func TestRetrierConstruction(t *testing.T) { Strategy: "limited", }, } - ctrl := NewController(cfg, log.NewNopLogger(), NewMetrics(t.Name(), cfg)) + m := NewMetrics(t.Name(), cfg) + ctrl := NewController(cfg, log.NewNopLogger(), m) require.IsType(t, &NoopController{}, ctrl) require.IsType(t, &LimitedRetrier{}, ctrl.getRetrier()) require.IsType(t, &NoopHedger{}, ctrl.getHedger()) + m.Unregister() } func TestCombinedConstruction(t *testing.T) { @@ -51,11 +57,13 @@ func TestCombinedConstruction(t *testing.T) { Strategy: "limited", }, } - ctrl := NewController(cfg, log.NewNopLogger(), NewMetrics(t.Name(), cfg)) + m := NewMetrics(t.Name(), cfg) + ctrl := NewController(cfg, log.NewNopLogger(), m) require.IsType(t, &AIMDController{}, ctrl) require.IsType(t, &LimitedRetrier{}, ctrl.getRetrier()) require.IsType(t, &NoopHedger{}, ctrl.getHedger()) + m.Unregister() } func TestHedgerConstruction(t *testing.T) { diff --git a/pkg/storage/chunk/client/congestion/controller_test.go b/pkg/storage/chunk/client/congestion/controller_test.go index 74620d334ff9..c280b2214718 100644 --- a/pkg/storage/chunk/client/congestion/controller_test.go +++ b/pkg/storage/chunk/client/congestion/controller_test.go @@ -46,6 +46,7 @@ func TestRequestNoopRetry(t *testing.T) { require.EqualValues(t, 2, testutil.ToFloat64(metrics.requests)) require.EqualValues(t, 0, testutil.ToFloat64(metrics.retries)) + metrics.Unregister() } func TestRequestZeroLimitedRetry(t *testing.T) { @@ -74,6 +75,7 @@ func TestRequestZeroLimitedRetry(t *testing.T) { require.EqualValues(t, 1, testutil.ToFloat64(metrics.requests)) require.EqualValues(t, 0, testutil.ToFloat64(metrics.retries)) + metrics.Unregister() } func TestRequestLimitedRetry(t *testing.T) { @@ -109,6 +111,7 @@ func TestRequestLimitedRetry(t *testing.T) { require.EqualValues(t, 1, testutil.ToFloat64(metrics.retriesExceeded)) require.EqualValues(t, 2, testutil.ToFloat64(metrics.retries)) require.EqualValues(t, 4, testutil.ToFloat64(metrics.requests)) + metrics.Unregister() } func TestRequestLimitedRetryNonRetryableErr(t *testing.T) { @@ -139,6 +142,7 @@ func TestRequestLimitedRetryNonRetryableErr(t *testing.T) { require.EqualValues(t, 0, testutil.ToFloat64(metrics.retries)) require.EqualValues(t, 1, testutil.ToFloat64(metrics.nonRetryableErrors)) require.EqualValues(t, 1, testutil.ToFloat64(metrics.requests)) + metrics.Unregister() } func TestAIMDReducedThroughput(t *testing.T) { @@ -212,6 +216,7 @@ func TestAIMDReducedThroughput(t *testing.T) { // should have registered some congestion latency in stats require.NotZero(t, statsCtx.Store().CongestionControlLatency) + metrics.Unregister() } func runAndMeasureRate(ctx context.Context, ctrl Controller, duration time.Duration) (float64, float64) { diff --git a/pkg/storage/chunk/client/congestion/metrics.go b/pkg/storage/chunk/client/congestion/metrics.go index 78684a4e4089..2e41d5e0bd5e 100644 --- a/pkg/storage/chunk/client/congestion/metrics.go +++ b/pkg/storage/chunk/client/congestion/metrics.go @@ -17,6 +17,11 @@ type Metrics struct { func (m Metrics) Unregister() { prometheus.Unregister(m.currentLimit) + prometheus.Unregister(m.backoffSec) + prometheus.Unregister(m.requests) + prometheus.Unregister(m.retries) + prometheus.Unregister(m.nonRetryableErrors) + prometheus.Unregister(m.retriesExceeded) } // NewMetrics creates metrics to be used for monitoring congestion control. diff --git a/tools/tsdb/migrate-versions/main_test.go b/tools/tsdb/migrate-versions/main_test.go index 7ac68521545b..62519e04f61f 100644 --- a/tools/tsdb/migrate-versions/main_test.go +++ b/tools/tsdb/migrate-versions/main_test.go @@ -141,4 +141,5 @@ func TestMigrateTables(t *testing.T) { } }) } + clientMetrics.Unregister() } From 5928827a00944c5a010f070d8acd5cb696f1b52b Mon Sep 17 00:00:00 2001 From: Paul Rogers Date: Thu, 9 May 2024 11:12:44 -0400 Subject: [PATCH 2/5] Lint --- pkg/loki/loki_test.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/loki/loki_test.go b/pkg/loki/loki_test.go index 1691811ee736..3a8fd8d899ca 100644 --- a/pkg/loki/loki_test.go +++ b/pkg/loki/loki_test.go @@ -4,10 +4,6 @@ import ( "bytes" "flag" "fmt" - "github.com/grafana/loki/v3/pkg/util/constants" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/collectors" - "github.com/prometheus/client_golang/prometheus/collectors/version" "io" "net" "net/http" @@ -15,6 +11,11 @@ import ( "testing" "time" + "github.com/grafana/loki/v3/pkg/util/constants" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" + "github.com/prometheus/client_golang/prometheus/collectors/version" + "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/server" "github.com/stretchr/testify/assert" From 940c07d4ac5a4c84849fbf4b68f4302b4300ccda Mon Sep 17 00:00:00 2001 From: Paul Rogers Date: Thu, 9 May 2024 11:15:48 -0400 Subject: [PATCH 3/5] Lint --- pkg/loki/loki_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/loki/loki_test.go b/pkg/loki/loki_test.go index 3a8fd8d899ca..b29d2aad2206 100644 --- a/pkg/loki/loki_test.go +++ b/pkg/loki/loki_test.go @@ -11,11 +11,12 @@ import ( "testing" "time" - "github.com/grafana/loki/v3/pkg/util/constants" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/collectors/version" + "github.com/grafana/loki/v3/pkg/util/constants" + "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/server" "github.com/stretchr/testify/assert" From 26b522976cafbf14bf1c558b86e63332c56ebd41 Mon Sep 17 00:00:00 2001 From: Paul Rogers Date: Thu, 9 May 2024 11:27:42 -0400 Subject: [PATCH 4/5] Use nil and let prometheus do the right thing --- pkg/compactor/deletion/delete_requests_client_test.go | 4 +--- pkg/pattern/flush_test.go | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pkg/compactor/deletion/delete_requests_client_test.go b/pkg/compactor/deletion/delete_requests_client_test.go index 248b75aafe67..299b4661cc94 100644 --- a/pkg/compactor/deletion/delete_requests_client_test.go +++ b/pkg/compactor/deletion/delete_requests_client_test.go @@ -7,12 +7,11 @@ import ( "testing" "time" - "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" ) func TestGetCacheGenNumberForUser(t *testing.T) { - deleteClientMetrics := NewDeleteRequestClientMetrics(prometheus.DefaultRegisterer) + deleteClientMetrics := NewDeleteRequestClientMetrics(nil) t.Run("it requests results from the compactor client", func(t *testing.T) { compactorClient := mockCompactorClient{ @@ -74,7 +73,6 @@ func TestGetCacheGenNumberForUser(t *testing.T) { client.Stop() }) - deleteClientMetrics.Unregister() } type mockCompactorClient struct { diff --git a/pkg/pattern/flush_test.go b/pkg/pattern/flush_test.go index fb30a96aa671..9ee4bd436992 100644 --- a/pkg/pattern/flush_test.go +++ b/pkg/pattern/flush_test.go @@ -12,7 +12,6 @@ import ( "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/grafana/dskit/user" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" @@ -23,7 +22,7 @@ import ( ) func TestSweepInstance(t *testing.T) { - ing, err := New(defaultIngesterTestConfig(t), "foo", prometheus.NewRegistry(), log.NewNopLogger()) + ing, err := New(defaultIngesterTestConfig(t), "foo", nil, log.NewNopLogger()) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), ing) //nolint:errcheck err = services.StartAndAwaitRunning(context.Background(), ing) From 764629fe36d6bf5524892d266288eadad16b6cef Mon Sep 17 00:00:00 2001 From: Paul Rogers Date: Thu, 9 May 2024 11:29:00 -0400 Subject: [PATCH 5/5] Use nil and let prometheus do the right thing --- pkg/scheduler/scheduler_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/scheduler/scheduler_test.go b/pkg/scheduler/scheduler_test.go index c9f569c5a588..939aa2b18bb9 100644 --- a/pkg/scheduler/scheduler_test.go +++ b/pkg/scheduler/scheduler_test.go @@ -27,7 +27,7 @@ func TestScheduler_setRunState(t *testing.T) { // we make a Scheduler with the things required to avoid nil pointers s := Scheduler{ log: util_log.Logger, - schedulerRunning: promauto.With(prometheus.NewRegistry()).NewGauge(prometheus.GaugeOpts{ + schedulerRunning: promauto.With(nil).NewGauge(prometheus.GaugeOpts{ Name: "cortex_query_scheduler_running", Help: "Value will be 1 if the scheduler is in the ReplicationSet and actively receiving/processing requests", }),