Skip to content

Commit

Permalink
ddtrace/tracer: change the way the tracer determines agent connectivity
Browse files Browse the repository at this point in the history
Previously, the tracer would attempt to make an http connection to the
transport's endpoint at startup.This patch improves the way that the tracer
determines agent connectivity by sending an empty payload to the agent.
Sending an empty payload is preferable because it exercises the actual
transport client and does not cause error logs to be reported by the agent
due to invalid (empty) payloads.

Fixes #705
  • Loading branch information
knusbaum committed Sep 2, 2020
1 parent 6d7c52c commit c823090
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 21 deletions.
19 changes: 8 additions & 11 deletions ddtrace/tracer/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"encoding/json"
"fmt"
"math"
"net/http"
"runtime"
"time"

Expand Down Expand Up @@ -47,18 +46,16 @@ type startupInfo struct {
GlobalService string `json:"global_service"` // Global service string. If not-nil should be same as Service. (#614)
}

// checkEndpoint tries to connect to the URL specified by endpoint.
// If the endpoint is not reachable, checkEndpoint returns an error
// explaining why.
func checkEndpoint(endpoint string) error {
req, err := http.NewRequest("POST", endpoint, nil)
if err != nil {
return fmt.Errorf("cannot create http request: %v", err)
}
_, err = defaultClient.Do(req)
// checkEndpoint tries to send an empty payload through the transport.
// If the transport fails, it returns the cause as an error.
func checkEndpoint(t transport) error {
p := newPayload()
p.updateHeader()
b, err := t.send(p)
if err != nil {
return err
}
b.Close()
return nil
}

Expand Down Expand Up @@ -94,7 +91,7 @@ func logStartup(t *tracer) {
if _, err := samplingRulesFromEnv(); err != nil {
info.SamplingRulesError = fmt.Sprintf("%s", err)
}
if err := checkEndpoint(t.transport.endpoint()); err != nil {
if err := checkEndpoint(t.transport); err != nil {
info.AgentError = fmt.Sprintf("%s", err)
log.Warn("DIAGNOSTICS Unable to reach agent: %s", err)
}
Expand Down
16 changes: 9 additions & 7 deletions ddtrace/tracer/log_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package tracer

import (
"errors"
"math"
"os"
"testing"
Expand All @@ -24,8 +25,8 @@ func TestStartupLog(t *testing.T) {

tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":""}`, tp.Lines()[1])
assert.Len(tp.Lines(), 1)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":""}`, tp.Lines()[0])
})

t.Run("configured", func(t *testing.T) {
Expand All @@ -45,6 +46,7 @@ func TestStartupLog(t *testing.T) {
WithServiceVersion("2.3.4"),
WithSamplingRules([]SamplingRule{ServiceRule("mysql", 0.75)}),
WithDebugMode(true),
withTransportSendError(errors.New("Test Send Failure")),
)
defer globalconfig.SetAnalyticsRate(math.NaN())
defer globalconfig.SetServiceName("")
Expand All @@ -53,21 +55,21 @@ func TestStartupLog(t *testing.T) {
tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service"}`, tp.Lines()[1])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Test Send Failure","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service"}`, tp.Lines()[1])
})

t.Run("errors", func(t *testing.T) {
assert := assert.New(t)
tp := new(testLogger)
os.Setenv("DD_TRACE_SAMPLING_RULES", `[{"service": "some.service", "sample_rate": 0.234}, {"service": "other.service"}]`)
defer os.Unsetenv("DD_TRACE_SAMPLING_RULES")
tracer, _, _, stop := startTestTracer(t, WithLogger(tp))
tracer, _, _, stop := startTestTracer(t, WithLogger(tp), withTransportSendError(errors.New("Test Send Failure")))
defer stop()

tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":""}`, tp.Lines()[1])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Test Send Failure","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":""}`, tp.Lines()[1])
})
}

Expand All @@ -87,10 +89,10 @@ func TestLogSamplingRules(t *testing.T) {
func TestLogAgentReachable(t *testing.T) {
assert := assert.New(t)
tp := new(testLogger)
tracer, _, _, stop := startTestTracer(t, WithLogger(tp))
tracer, _, _, stop := startTestTracer(t, WithLogger(tp), withTransportSendError(errors.New("Test Send Failure")))
defer stop()
tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ WARN: DIAGNOSTICS Unable to reach agent: Post`, tp.Lines()[0])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ WARN: DIAGNOSTICS Unable to reach agent: Test Send Failure`, tp.Lines()[0])
}
10 changes: 10 additions & 0 deletions ddtrace/tracer/option_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ import (
"github.com/stretchr/testify/assert"
)

// withTransportSendError will cause err to be returned on any call to (transport).send()
// if the tracer is configured with a dummyTransport,
func withTransportSendError(err error) StartOption {
return func(c *config) {
if t, ok := c.transport.(*dummyTransport); ok {
t.sendError = err
}
}
}

func withTransport(t transport) StartOption {
return func(c *config) {
c.transport = t
Expand Down
6 changes: 5 additions & 1 deletion ddtrace/tracer/tracer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,8 @@ func startTestTracer(t interface {
// Mock Transport with a real Encoder
type dummyTransport struct {
sync.RWMutex
traces spanLists
traces spanLists
sendError error
}

func newDummyTransport() *dummyTransport {
Expand All @@ -1164,6 +1165,9 @@ func (t *dummyTransport) Len() int {
}

func (t *dummyTransport) send(p *payload) (io.ReadCloser, error) {
if t.sendError != nil {
return nil, t.sendError
}
traces, err := decode(p)
if err != nil {
return nil, err
Expand Down
4 changes: 2 additions & 2 deletions ddtrace/tracer/transport.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ type transport interface {

// newTransport returns a new Transport implementation that sends traces to a
// trace agent running on the given hostname and port, using a given
// http.RoundTripper. If the zero values for hostname and port are provided,
// *http.Client. If the zero values for hostname and port are provided,
// the default values will be used ("localhost" for hostname, and "8126" for
// port). If roundTripper is nil, a default is used.
// port). If client is nil, a default is used.
//
// In general, using this method is only necessary if you have a trace agent
// running on a non-default port, if it's located on another machine, or when
Expand Down

0 comments on commit c823090

Please sign in to comment.