From 1b29c66b1980bf4431fc4ea63e35d2f124c6ec1d Mon Sep 17 00:00:00 2001 From: Nick Ripley Date: Mon, 16 May 2022 09:56:18 -0400 Subject: [PATCH] profiler: fix TestStopLatency The timeout for the test was not realistic given the constraints of the Go runtime CPU profiler. Increase the timeout and add a second test case meant to exercise the latency of both stopping the profiling as well as stopping the profile upload. Fixes #1294 --- profiler/profiler_test.go | 86 +++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/profiler/profiler_test.go b/profiler/profiler_test.go index 3686068095..9b6b9c5c1e 100644 --- a/profiler/profiler_test.go +++ b/profiler/profiler_test.go @@ -184,48 +184,56 @@ func TestStartStopIdempotency(t *testing.T) { // TestStopLatency tries to make sure that calling Stop() doesn't hang, i.e. // that ongoing profiling or upload operations are immediately canceled. func TestStopLatency(t *testing.T) { - t.Skip("broken test, see issue #1294") - p, err := newProfiler( - WithURL("http://invalid.invalid/"), - WithPeriod(1000*time.Millisecond), - CPUDuration(500*time.Millisecond), - ) - require.NoError(t, err) - uploadStart := make(chan struct{}, 1) - uploadFunc := p.uploadFunc - p.uploadFunc = func(b batch) error { - select { - case uploadStart <- struct{}{}: - default: - // uploadFunc may be called more than once, don't leak this goroutine - } - return uploadFunc(b) - } - p.run() + t.Run("stop-profiles", func(t *testing.T) { + Start( + WithPeriod(time.Hour), + CPUDuration(time.Hour), + ) - <-uploadStart - // Wait for uploadFunc(b) to run. A bit racy, but worst case is the test - // passing for the wrong reasons. - time.Sleep(10 * time.Millisecond) + // give profiling time to start + time.Sleep(50 * time.Millisecond) + start := time.Now() + Stop() + elapsed := time.Since(start) + // CPU profiling polls in 100 millisecond intervals and this can't be + // interrupted by pprof.StopCPUProfile, so we can't guarantee profiling + // will stop faster than 200 milliseconds (one interval to read + // remaining samples, second interval to detect profiling has stopped). + // Set a conservative upper bound on that stop time plus profile + // serialization. + if elapsed > 500*time.Millisecond { + t.Errorf("profiler took %v to stop", elapsed) + } + }) - stopped := make(chan struct{}, 1) - go func() { - p.stop() - stopped <- struct{}{} - }() + t.Run("stop-upload", func(t *testing.T) { + received := make(chan struct{}) + stop := make(chan struct{}) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + select { + case received <- struct{}{}: + default: + } + <-stop + })) + defer server.Close() + defer close(stop) + + Start( + WithAgentAddr(server.Listener.Addr().String()), + WithPeriod(10*time.Millisecond), + CPUDuration(10*time.Millisecond), + WithUploadTimeout(time.Hour), + ) - // CPU profiling polls in 100 millisecond intervals and this can't be - // interrupted by pprof.StopCPUProfile, so we can't guarantee profiling - // will stop faster than that. - timeout := 200 * time.Millisecond - select { - case <-stopped: - case <-time.After(timeout): - // Capture stacks so we can see which goroutines are hanging and why. - stacks := make([]byte, 64*1024) - stacks = stacks[0:runtime.Stack(stacks, true)] - t.Fatalf("Stop() took longer than %s:\n%s", timeout, stacks) - } + <-received + start := time.Now() + Stop() + elapsed := time.Since(start) + if elapsed > 500*time.Millisecond { + t.Errorf("profiler took %v to stop", elapsed) + } + }) } func TestProfilerInternal(t *testing.T) {