From f71d3167bfed1ae9e577aa09bc55cda2b27b6f2c Mon Sep 17 00:00:00 2001 From: Tyler Yahn Date: Wed, 14 Apr 2021 12:09:08 -0700 Subject: [PATCH 1/4] Fix flaky OTLP exporter reconnect test The tests that check the OTLP exporter will reconnect wait for the reconnect loop to complete, in theory. However, they do not yield the active goroutine scheduling. The reconnect loop is in its own goroutine meaning it is unlikely for that loop to be re-entered, especially on slow systems. This updates the tests call runtime.Gosched when waiting for the reconnect loop and yield the scheduling to other goroutines. --- .../otlp/otlpgrpc/otlp_integration_test.go | 44 ++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/exporters/otlp/otlpgrpc/otlp_integration_test.go b/exporters/otlp/otlpgrpc/otlp_integration_test.go index d0ee9fa6198..69a6a4ba8f1 100644 --- a/exporters/otlp/otlpgrpc/otlp_integration_test.go +++ b/exporters/otlp/otlpgrpc/otlp_integration_test.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "net" + "runtime" "strings" "testing" "time" @@ -144,7 +145,7 @@ func TestNewExporter_invokeStartThenStopManyTimes(t *testing.T) { func TestNewExporter_collectorConnectionDiesThenReconnectsWhenInRestMode(t *testing.T) { mc := runMockCollector(t) - reconnectionPeriod := 2 * time.Second // 2 second + jitter rest time after reconnection + reconnectionPeriod := 20 * time.Millisecond ctx := context.Background() exp := newGRPCExporter(t, ctx, mc.endpoint, otlpgrpc.WithReconnectionPeriod(reconnectionPeriod)) @@ -165,8 +166,19 @@ func TestNewExporter_collectorConnectionDiesThenReconnectsWhenInRestMode(t *test mc.endpoint, ) - // give it time for first reconnection - <-time.After(time.Millisecond * 20) + // Give the exporter sometime to reconnect + func() { + timer := time.NewTimer(reconnectionPeriod * 4) + defer timer.Stop() + for { + select { + case <-timer.C: + return + default: + runtime.Gosched() + } + } + }() // second export, it will detect connection issue, change state of exporter to disconnected and // send message to disconnected channel but this time reconnection gouroutine will be in (rest mode, not listening to the disconnected channel) @@ -184,7 +196,18 @@ func TestNewExporter_collectorConnectionDiesThenReconnectsWhenInRestMode(t *test // make sure reconnection loop hits beginning and goes back to waiting mode // after hitting beginning of the loop it should reconnect - <-time.After(time.Second * 4) + func() { + timer := time.NewTimer(reconnectionPeriod * 4) + defer timer.Stop() + for { + select { + case <-timer.C: + return + default: + runtime.Gosched() + } + } + }() n := 10 for i := 0; i < n; i++ { @@ -240,7 +263,18 @@ func TestNewExporter_collectorConnectionDiesThenReconnects(t *testing.T) { nmc := runMockCollectorAtEndpoint(t, mc.endpoint) // Give the exporter sometime to reconnect - <-time.After(reconnectionPeriod * 4) + func() { + timer := time.NewTimer(reconnectionPeriod * 4) + defer timer.Stop() + for { + select { + case <-timer.C: + return + default: + runtime.Gosched() + } + } + }() n := 10 for i := 0; i < n; i++ { From 2d5e814cd338ff23572b10cedd9ed1276fe3cad3 Mon Sep 17 00:00:00 2001 From: Tyler Yahn Date: Wed, 14 Apr 2021 12:18:57 -0700 Subject: [PATCH 2/4] Add changes to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 29a7a7fb560..f545c673afe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm Additionally, this tag is overridden, as specified in the OTel specification, if the event contains an attribute with that key. (#1768) - Zipkin Exporter: Ensure mapping between OTel and Zipkin span data complies with the specification. (#1688) - Fixed typo for default service name in Jaeger Exporter. (#1797) +- Fixed OTLP gRPC export connection reconnect testing bug. (#1527, #1814) ### Changed From c027ea9afb31f0a57094bc6dc55b272c77c2f870 Mon Sep 17 00:00:00 2001 From: Tyler Yahn Date: Wed, 14 Apr 2021 12:26:23 -0700 Subject: [PATCH 3/4] Use time.After instead of Timer --- exporters/otlp/otlpgrpc/otlp_integration_test.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/exporters/otlp/otlpgrpc/otlp_integration_test.go b/exporters/otlp/otlpgrpc/otlp_integration_test.go index 69a6a4ba8f1..b89bbc0be61 100644 --- a/exporters/otlp/otlpgrpc/otlp_integration_test.go +++ b/exporters/otlp/otlpgrpc/otlp_integration_test.go @@ -168,11 +168,10 @@ func TestNewExporter_collectorConnectionDiesThenReconnectsWhenInRestMode(t *test // Give the exporter sometime to reconnect func() { - timer := time.NewTimer(reconnectionPeriod * 4) - defer timer.Stop() + timer := time.After(reconnectionPeriod * 10) for { select { - case <-timer.C: + case <-timer: return default: runtime.Gosched() @@ -197,11 +196,10 @@ func TestNewExporter_collectorConnectionDiesThenReconnectsWhenInRestMode(t *test // make sure reconnection loop hits beginning and goes back to waiting mode // after hitting beginning of the loop it should reconnect func() { - timer := time.NewTimer(reconnectionPeriod * 4) - defer timer.Stop() + timer := time.After(reconnectionPeriod * 10) for { select { - case <-timer.C: + case <-timer: return default: runtime.Gosched() @@ -264,11 +262,10 @@ func TestNewExporter_collectorConnectionDiesThenReconnects(t *testing.T) { // Give the exporter sometime to reconnect func() { - timer := time.NewTimer(reconnectionPeriod * 4) - defer timer.Stop() + timer := time.After(reconnectionPeriod * 10) for { select { - case <-timer.C: + case <-timer: return default: runtime.Gosched() From 7cb51a19cb9351cd4960ec3105af4cd51d5f21bf Mon Sep 17 00:00:00 2001 From: Tyler Yahn Date: Wed, 14 Apr 2021 12:48:45 -0700 Subject: [PATCH 4/4] Remove changelog entry --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f545c673afe..29a7a7fb560 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,7 +47,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm Additionally, this tag is overridden, as specified in the OTel specification, if the event contains an attribute with that key. (#1768) - Zipkin Exporter: Ensure mapping between OTel and Zipkin span data complies with the specification. (#1688) - Fixed typo for default service name in Jaeger Exporter. (#1797) -- Fixed OTLP gRPC export connection reconnect testing bug. (#1527, #1814) ### Changed