Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Memory leak introduced by correlation between metrics & traces fixed #449

Merged
merged 2 commits into from
Jan 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/content/docs/operations/observability.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ By default, heimdall exposes a `/metrics` HTTP endpoint on port `10250` (See als
* Process information, like CPU, memory, file descriptor usage and start time
* Go runtime information, including details about GC, number of goroutines and OS threats
* Information about the metrics endpoint itself, including the number of internal errors encountered while gathering the metrics, number of current inflight and overall scrapes done.
* Information about the decision and proxy requests handled, including the total amount and duration of http requests by status code, method and path, as well as information about requests in progress. If tracing information is available, labels about `parent_id`, `trace_id` and `span_id` are set for histogram metrics.
* Information about the decision and proxy requests handled, including the total amount and duration of http requests by status code, method and path, as well as information about requests in progress.
* Information about expiry for configured certificates.

The following table provide detailed information about these
Expand Down
27 changes: 3 additions & 24 deletions internal/fiber/middleware/prometheus/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
trace2 "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/trace"
)

type metricsHandler struct {
Expand Down Expand Up @@ -64,7 +62,7 @@ func New(opts ...Option) fiber.Handler {
1.0, 2.0, 5.0, 10.0, 15.0, // 1, 2, 5, 10, 20s
},
},
[]string{"status_code", "method", "path", "parent_id", "trace_id", "span_id"},
[]string{"status_code", "method", "path"},
)

gauge := promauto.With(options.registerer).NewGaugeVec(prometheus.GaugeOpts{
Expand All @@ -84,10 +82,7 @@ func New(opts ...Option) fiber.Handler {
}

func (h *metricsHandler) observeRequest(ctx *fiber.Ctx) error {
const (
MagicNumber = 1e9
NoValue = "none"
)
const MagicNumber = 1e9

start := time.Now()

Expand Down Expand Up @@ -121,24 +116,8 @@ func (h *metricsHandler) observeRequest(ctx *fiber.Ctx) error {
statusCode := strconv.Itoa(status)
h.reqCounter.WithLabelValues(statusCode, method, path).Inc()

span := trace.SpanFromContext(ctx.UserContext())
spanCtx := span.SpanContext()

traceID := NoValue
spanID := NoValue
parentID := NoValue

if spanCtx.IsValid() {
if roSpan, ok := span.(trace2.ReadOnlySpan); ok && roSpan.Parent().IsValid() {
parentID = roSpan.Parent().SpanID().String()
}

traceID = spanCtx.TraceID().String()
spanID = spanCtx.SpanID().String()
}

elapsed := float64(time.Since(start).Nanoseconds()) / MagicNumber
h.reqHistogram.WithLabelValues(statusCode, method, path, parentID, traceID, spanID).Observe(elapsed)
h.reqHistogram.WithLabelValues(statusCode, method, path).Observe(elapsed)

return err
}
28 changes: 0 additions & 28 deletions internal/fiber/middleware/prometheus/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package prometheus

import (
"context"
"net/http"
"net/http/httptest"
"testing"
Expand All @@ -27,12 +26,6 @@ import (
dto "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/propagation"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/trace"

"github.com/dadrus/heimdall/internal/fiber/middleware/opentelemetry"
)

func metricForType(metrics []*dto.MetricFamily, metricType *dto.MetricType) *dto.MetricFamily {
Expand All @@ -58,13 +51,6 @@ func getLabel(labels []*dto.LabelPair, name string) string {
func TestHandlerObserveRequests(t *testing.T) { //nolint:maintidx
t.Parallel()

otel.SetTracerProvider(sdktrace.NewTracerProvider())
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}))

parentCtx := trace.NewSpanContext(trace.SpanContextConfig{
TraceID: trace.TraceID{1}, SpanID: trace.SpanID{2}, TraceFlags: trace.FlagsSampled,
})

for _, tc := range []struct {
uc string
req *http.Request
Expand Down Expand Up @@ -97,9 +83,6 @@ func TestHandlerObserveRequests(t *testing.T) { //nolint:maintidx
assert.Equal(t, "/test", getLabel(histMetric.Metric[0].Label, "path"))
assert.Equal(t, "foobar", getLabel(histMetric.Metric[0].Label, "service"))
assert.Equal(t, "200", getLabel(histMetric.Metric[0].Label, "status_code"))
assert.Equal(t, "0200000000000000", getLabel(histMetric.Metric[0].Label, "parent_id"))
assert.NotEqual(t, "none", getLabel(histMetric.Metric[0].Label, "span_id"))
assert.NotEqual(t, "none", getLabel(histMetric.Metric[0].Label, "trace_id"))
require.Len(t, histMetric.Metric[0].Histogram.Bucket, 22)

gaugeMetric := metricForType(metrics, dto.MetricType_GAUGE.Enum())
Expand Down Expand Up @@ -142,9 +125,6 @@ func TestHandlerObserveRequests(t *testing.T) { //nolint:maintidx
assert.Equal(t, "/test", getLabel(histMetric.Metric[0].Label, "path"))
assert.Equal(t, "foobar", getLabel(histMetric.Metric[0].Label, "service"))
assert.Equal(t, "500", getLabel(histMetric.Metric[0].Label, "status_code"))
assert.Equal(t, "0200000000000000", getLabel(histMetric.Metric[0].Label, "parent_id"))
assert.NotEqual(t, "none", getLabel(histMetric.Metric[0].Label, "span_id"))
assert.NotEqual(t, "none", getLabel(histMetric.Metric[0].Label, "trace_id"))
require.Len(t, histMetric.Metric[0].Histogram.Bucket, 22)

gaugeMetric := metricForType(metrics, dto.MetricType_GAUGE.Enum())
Expand Down Expand Up @@ -187,9 +167,6 @@ func TestHandlerObserveRequests(t *testing.T) { //nolint:maintidx
assert.Equal(t, "/error", getLabel(histMetric.Metric[0].Label, "path"))
assert.Equal(t, "foobar", getLabel(histMetric.Metric[0].Label, "service"))
assert.Equal(t, "410", getLabel(histMetric.Metric[0].Label, "status_code"))
assert.Equal(t, "0200000000000000", getLabel(histMetric.Metric[0].Label, "parent_id"))
assert.NotEqual(t, "none", getLabel(histMetric.Metric[0].Label, "span_id"))
assert.NotEqual(t, "none", getLabel(histMetric.Metric[0].Label, "trace_id"))
require.Len(t, histMetric.Metric[0].Histogram.Bucket, 22)

gaugeMetric := metricForType(metrics, dto.MetricType_GAUGE.Enum())
Expand Down Expand Up @@ -220,7 +197,6 @@ func TestHandlerObserveRequests(t *testing.T) { //nolint:maintidx
registry := prometheus.NewRegistry()

app := fiber.New()
app.Use(opentelemetry.New())
app.Use(New(
WithRegisterer(registry),
WithNamespace("foo"),
Expand All @@ -237,10 +213,6 @@ func TestHandlerObserveRequests(t *testing.T) { //nolint:maintidx

defer app.Shutdown() // nolint: errcheck

otel.GetTextMapPropagator().Inject(
trace.ContextWithRemoteSpanContext(context.Background(), parentCtx),
propagation.HeaderCarrier(tc.req.Header))

// WHEN
resp, err := app.Test(tc.req, -1)
require.NoError(t, err)
Expand Down