From c823090d53b75dfc5c962b4913bf2a7315424a89 Mon Sep 17 00:00:00 2001 From: Kyle Nusbaum Date: Wed, 2 Sep 2020 17:04:38 -0500 Subject: [PATCH] ddtrace/tracer: change the way the tracer determines agent connectivity Previously, the tracer would attempt to make an http connection to the transport's endpoint at startup.This patch improves the way that the tracer determines agent connectivity by sending an empty payload to the agent. Sending an empty payload is preferable because it exercises the actual transport client and does not cause error logs to be reported by the agent due to invalid (empty) payloads. Fixes #705 --- ddtrace/tracer/log.go | 19 ++++++++----------- ddtrace/tracer/log_test.go | 16 +++++++++------- ddtrace/tracer/option_test.go | 10 ++++++++++ ddtrace/tracer/tracer_test.go | 6 +++++- ddtrace/tracer/transport.go | 4 ++-- 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/ddtrace/tracer/log.go b/ddtrace/tracer/log.go index 7e494319f0..106ca51510 100644 --- a/ddtrace/tracer/log.go +++ b/ddtrace/tracer/log.go @@ -9,7 +9,6 @@ import ( "encoding/json" "fmt" "math" - "net/http" "runtime" "time" @@ -47,18 +46,16 @@ type startupInfo struct { GlobalService string `json:"global_service"` // Global service string. If not-nil should be same as Service. (#614) } -// checkEndpoint tries to connect to the URL specified by endpoint. -// If the endpoint is not reachable, checkEndpoint returns an error -// explaining why. -func checkEndpoint(endpoint string) error { - req, err := http.NewRequest("POST", endpoint, nil) - if err != nil { - return fmt.Errorf("cannot create http request: %v", err) - } - _, err = defaultClient.Do(req) +// checkEndpoint tries to send an empty payload through the transport. +// If the transport fails, it returns the cause as an error. +func checkEndpoint(t transport) error { + p := newPayload() + p.updateHeader() + b, err := t.send(p) if err != nil { return err } + b.Close() return nil } @@ -94,7 +91,7 @@ func logStartup(t *tracer) { if _, err := samplingRulesFromEnv(); err != nil { info.SamplingRulesError = fmt.Sprintf("%s", err) } - if err := checkEndpoint(t.transport.endpoint()); err != nil { + if err := checkEndpoint(t.transport); err != nil { info.AgentError = fmt.Sprintf("%s", err) log.Warn("DIAGNOSTICS Unable to reach agent: %s", err) } diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index 61d03c9873..58458a1a50 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -6,6 +6,7 @@ package tracer import ( + "errors" "math" "os" "testing" @@ -24,8 +25,8 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) - assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":""}`, tp.Lines()[1]) + assert.Len(tp.Lines(), 1) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":""}`, tp.Lines()[0]) }) t.Run("configured", func(t *testing.T) { @@ -45,6 +46,7 @@ func TestStartupLog(t *testing.T) { WithServiceVersion("2.3.4"), WithSamplingRules([]SamplingRule{ServiceRule("mysql", 0.75)}), WithDebugMode(true), + withTransportSendError(errors.New("Test Send Failure")), ) defer globalconfig.SetAnalyticsRate(math.NaN()) defer globalconfig.SetServiceName("") @@ -53,7 +55,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service"}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Test Send Failure","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service"}`, tp.Lines()[1]) }) t.Run("errors", func(t *testing.T) { @@ -61,13 +63,13 @@ func TestStartupLog(t *testing.T) { tp := new(testLogger) os.Setenv("DD_TRACE_SAMPLING_RULES", `[{"service": "some.service", "sample_rate": 0.234}, {"service": "other.service"}]`) defer os.Unsetenv("DD_TRACE_SAMPLING_RULES") - tracer, _, _, stop := startTestTracer(t, WithLogger(tp)) + tracer, _, _, stop := startTestTracer(t, WithLogger(tp), withTransportSendError(errors.New("Test Send Failure"))) defer stop() tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":""}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Test Send Failure","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":""}`, tp.Lines()[1]) }) } @@ -87,10 +89,10 @@ func TestLogSamplingRules(t *testing.T) { func TestLogAgentReachable(t *testing.T) { assert := assert.New(t) tp := new(testLogger) - tracer, _, _, stop := startTestTracer(t, WithLogger(tp)) + tracer, _, _, stop := startTestTracer(t, WithLogger(tp), withTransportSendError(errors.New("Test Send Failure"))) defer stop() tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ WARN: DIAGNOSTICS Unable to reach agent: Post`, tp.Lines()[0]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ WARN: DIAGNOSTICS Unable to reach agent: Test Send Failure`, tp.Lines()[0]) } diff --git a/ddtrace/tracer/option_test.go b/ddtrace/tracer/option_test.go index 8f5d811f17..c0da0cc207 100644 --- a/ddtrace/tracer/option_test.go +++ b/ddtrace/tracer/option_test.go @@ -17,6 +17,16 @@ import ( "github.com/stretchr/testify/assert" ) +// withTransportSendError will cause err to be returned on any call to (transport).send() +// if the tracer is configured with a dummyTransport, +func withTransportSendError(err error) StartOption { + return func(c *config) { + if t, ok := c.transport.(*dummyTransport); ok { + t.sendError = err + } + } +} + func withTransport(t transport) StartOption { return func(c *config) { c.transport = t diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index 78faa8903c..18248b1936 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -1150,7 +1150,8 @@ func startTestTracer(t interface { // Mock Transport with a real Encoder type dummyTransport struct { sync.RWMutex - traces spanLists + traces spanLists + sendError error } func newDummyTransport() *dummyTransport { @@ -1164,6 +1165,9 @@ func (t *dummyTransport) Len() int { } func (t *dummyTransport) send(p *payload) (io.ReadCloser, error) { + if t.sendError != nil { + return nil, t.sendError + } traces, err := decode(p) if err != nil { return nil, err diff --git a/ddtrace/tracer/transport.go b/ddtrace/tracer/transport.go index 7fa506e468..ce459e6edf 100644 --- a/ddtrace/tracer/transport.go +++ b/ddtrace/tracer/transport.go @@ -58,9 +58,9 @@ type transport interface { // newTransport returns a new Transport implementation that sends traces to a // trace agent running on the given hostname and port, using a given -// http.RoundTripper. If the zero values for hostname and port are provided, +// *http.Client. If the zero values for hostname and port are provided, // the default values will be used ("localhost" for hostname, and "8126" for -// port). If roundTripper is nil, a default is used. +// port). If client is nil, a default is used. // // In general, using this method is only necessary if you have a trace agent // running on a non-default port, if it's located on another machine, or when