Skip to content

Commit

Permalink
feat: classify incomplete requests as retriable
Browse files Browse the repository at this point in the history
feat(proxy_round_tripper): Use atomics instead of mutexes for traces
  • Loading branch information
domdom82 authored and geofffranks committed Mar 31, 2023
1 parent 8ca9d2f commit a7dbf84
Show file tree
Hide file tree
Showing 12 changed files with 345 additions and 132 deletions.
72 changes: 40 additions & 32 deletions proxy/fails/basic_classifiers.go
Original file line number Diff line number Diff line change
@@ -1,80 +1,88 @@
package fails

import (
"context"
"crypto/tls"
"crypto/x509"
"errors"
"net"

"context"
"strings"
)

var IdempotentRequestEOFError = errors.New("EOF (via idempotent request)")

var IncompleteRequestError = errors.New("incomplete request")

var AttemptedTLSWithNonTLSBackend = ClassifierFunc(func(err error) bool {
switch err.(type) {
case tls.RecordHeaderError, *tls.RecordHeaderError:
return true
default:
return false
}
return errors.As(err, &tls.RecordHeaderError{})
})

var Dial = ClassifierFunc(func(err error) bool {
ne, ok := err.(*net.OpError)
return ok && ne.Op == "dial"
var opErr *net.OpError
if errors.As(err, &opErr) {
return opErr.Op == "dial"
}
return false
})

var ContextCancelled = ClassifierFunc(func(err error) bool {
return err == context.Canceled
return errors.Is(err, context.Canceled)
})

var ConnectionResetOnRead = ClassifierFunc(func(err error) bool {
ne, ok := err.(*net.OpError)
return ok && ne.Op == "read" && ne.Err.Error() == "read: connection reset by peer"
var opErr *net.OpError
if errors.As(err, &opErr) {
return opErr.Err.Error() == "read: connection reset by peer"
}
return false
})

var RemoteFailedCertCheck = ClassifierFunc(func(err error) bool {
return err != nil && (err.Error() == "readLoopPeekFailLocked: remote error: tls: bad certificate" || err.Error() == "remote error: tls: bad certificate")
var opErr *net.OpError
if errors.As(err, &opErr) {
return opErr.Op == "remote error" && opErr.Err.Error() == "tls: bad certificate"
}
return false
})

var RemoteHandshakeTimeout = ClassifierFunc(func(err error) bool {
return err != nil && err.Error() == "net/http: TLS handshake timeout"
return err != nil && strings.Contains(err.Error(), "net/http: TLS handshake timeout")
})

var ExpiredOrNotYetValidCertFailure = ClassifierFunc(func(err error) bool {
switch x509err := err.(type) {
case x509.CertificateInvalidError:
return x509err.Reason == x509.Expired
case *x509.CertificateInvalidError:
return x509err.Reason == x509.Expired
default:
return false
var certErr x509.CertificateInvalidError
if errors.As(err, &certErr) {
return certErr.Reason == x509.Expired
}
return false
})

var RemoteHandshakeFailure = ClassifierFunc(func(err error) bool {
return err != nil && err.Error() == "remote error: tls: handshake failure"
var opErr *net.OpError
if errors.As(err, &opErr) {
return opErr != nil && opErr.Error() == "remote error: tls: handshake failure"
}
return false
})

var HostnameMismatch = ClassifierFunc(func(err error) bool {
switch err.(type) {
case x509.HostnameError, *x509.HostnameError:
return true
default:
return false
}
return errors.As(err, &x509.HostnameError{})
})

var UntrustedCert = ClassifierFunc(func(err error) bool {
switch err.(type) {
case x509.UnknownAuthorityError, *x509.UnknownAuthorityError:
var tlsCertError *tls.CertificateVerificationError
switch {
case errors.As(err, &x509.UnknownAuthorityError{}), errors.As(err, &tlsCertError):
return true
default:
return false
}
})

var IdempotentRequestEOF = ClassifierFunc(func(err error) bool {
return err == IdempotentRequestEOFError
return errors.Is(err, IdempotentRequestEOFError)
})

var IncompleteRequest = ClassifierFunc(func(err error) bool {
return errors.Is(err, IncompleteRequestError)
})
7 changes: 5 additions & 2 deletions proxy/fails/basic_classifiers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ var _ = Describe("ErrorClassifiers - enemy tests", func() {
Describe("ExpiredOrNotYetValidCertFailure", func() {
Context("when the cert is expired or not yet valid", func() {
var (
expiredClientCert *x509.Certificate
expiredClientCert *x509.Certificate
expiredClientCACert *x509.CertPool
)

BeforeEach(func() {
Expand All @@ -243,10 +244,12 @@ var _ = Describe("ErrorClassifiers - enemy tests", func() {
var err error
expiredClientCert, err = x509.ParseCertificate(block.Bytes)
Expect(err).NotTo(HaveOccurred())
expiredClientCACert = x509.NewCertPool()
expiredClientCACert.AddCert(expiredClientCertPool.CACert)
})

It("matches", func() {
_, err := expiredClientCert.Verify(x509.VerifyOptions{})
_, err := expiredClientCert.Verify(x509.VerifyOptions{Roots: expiredClientCACert})
Expect(fails.ExpiredOrNotYetValidCertFailure(err)).To(BeTrue())
})
})
Expand Down
1 change: 1 addition & 0 deletions proxy/fails/classifier_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ var RetriableClassifiers = ClassifierGroup{
UntrustedCert,
ExpiredOrNotYetValidCertFailure,
IdempotentRequestEOF,
IncompleteRequest,
}

var FailableClassifiers = ClassifierGroup{
Expand Down
21 changes: 16 additions & 5 deletions proxy/fails/classifier_group_test.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
package fails_test

import (
"errors"

"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"net"

"crypto/tls"

"code.cloudfoundry.org/gorouter/proxy/fails"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"

"code.cloudfoundry.org/gorouter/proxy/fails"
)

var _ = Describe("ClassifierGroup", func() {
Expand All @@ -34,15 +34,25 @@ var _ = Describe("ClassifierGroup", func() {
rc := fails.RetriableClassifiers

Expect(rc.Classify(&net.OpError{Op: "dial"})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "dial"}))).To(BeTrue())
Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")}))).To(BeTrue())
Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}))).To(BeTrue())
Expect(rc.Classify(errors.New("net/http: TLS handshake timeout"))).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, errors.New("net/http: TLS handshake timeout")))).To(BeTrue())
Expect(rc.Classify(tls.RecordHeaderError{})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, tls.RecordHeaderError{}))).To(BeTrue())
Expect(rc.Classify(x509.HostnameError{})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue())
Expect(rc.Classify(x509.UnknownAuthorityError{})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.UnknownAuthorityError{}))).To(BeTrue())
Expect(rc.Classify(x509.CertificateInvalidError{Reason: x509.Expired})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.CertificateInvalidError{Reason: x509.Expired}))).To(BeTrue())
Expect(rc.Classify(errors.New("i'm a potato"))).To(BeFalse())
Expect(rc.Classify(fails.IdempotentRequestEOFError)).To(BeTrue())
Expect(rc.Classify(fails.IncompleteRequestError)).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue())
})
})

Expand All @@ -61,6 +71,7 @@ var _ = Describe("ClassifierGroup", func() {
Expect(pc.Classify(x509.CertificateInvalidError{Reason: x509.Expired})).To(BeTrue())
Expect(pc.Classify(errors.New("i'm a potato"))).To(BeFalse())
Expect(pc.Classify(fails.IdempotentRequestEOFError)).To(BeTrue())
Expect(pc.Classify(fails.IncompleteRequestError)).To(BeTrue())
})
})
})
11 changes: 6 additions & 5 deletions proxy/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import (

"code.cloudfoundry.org/gorouter/common/health"

"github.com/cloudfoundry/dropsonde"
"github.com/uber-go/zap"
"github.com/urfave/negroni"

"code.cloudfoundry.org/gorouter/accesslog"
router_http "code.cloudfoundry.org/gorouter/common/http"
"code.cloudfoundry.org/gorouter/config"
Expand All @@ -25,9 +29,6 @@ import (
"code.cloudfoundry.org/gorouter/registry"
"code.cloudfoundry.org/gorouter/route"
"code.cloudfoundry.org/gorouter/routeservice"
"github.com/cloudfoundry/dropsonde"
"github.com/uber-go/zap"
"github.com/urfave/negroni"
)

var (
Expand Down Expand Up @@ -110,7 +111,7 @@ func NewProxy(

roundTripperFactory := &round_tripper.FactoryImpl{
BackendTemplate: &http.Transport{
Dial: dialer.Dial,
DialContext: dialer.DialContext,
DisableKeepAlives: cfg.DisableKeepAlives,
MaxIdleConns: cfg.MaxIdleConns,
IdleConnTimeout: 90 * time.Second, // setting the value to golang default transport
Expand All @@ -120,7 +121,7 @@ func NewProxy(
TLSHandshakeTimeout: cfg.TLSHandshakeTimeout,
},
RouteServiceTemplate: &http.Transport{
Dial: dialer.Dial,
DialContext: dialer.DialContext,
DisableKeepAlives: cfg.DisableKeepAlives,
MaxIdleConns: cfg.MaxIdleConns,
IdleConnTimeout: 90 * time.Second, // setting the value to golang default transport
Expand Down
5 changes: 3 additions & 2 deletions proxy/round_tripper/dropsonde_round_tripper.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ package round_tripper
import (
"net/http"

"code.cloudfoundry.org/gorouter/proxy/utils"
"github.com/cloudfoundry/dropsonde"

"code.cloudfoundry.org/gorouter/proxy/utils"
)

func NewDropsondeRoundTripper(p ProxyRoundTripper) ProxyRoundTripper {
Expand Down Expand Up @@ -44,7 +45,7 @@ func (t *FactoryImpl) New(expectedServerName string, isRouteService bool, isHttp
customTLSConfig := utils.TLSConfigWithServerName(expectedServerName, template.TLSClientConfig, isRouteService)

newTransport := &http.Transport{
Dial: template.Dial,
DialContext: template.DialContext,
DisableKeepAlives: template.DisableKeepAlives,
MaxIdleConns: template.MaxIdleConns,
IdleConnTimeout: template.IdleConnTimeout,
Expand Down
65 changes: 65 additions & 0 deletions proxy/round_tripper/error_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"net"
"net/http/httptest"

Expand Down Expand Up @@ -136,6 +137,22 @@ var _ = Describe("HandleError", func() {
})
})

Context("HostnameMismatch wrapped in IncompleteRequestError", func() {
BeforeEach(func() {
wrappedErr := x509.HostnameError{Host: "the wrong one"}
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
errorHandler.HandleError(responseWriter, err)
})

It("has a 503 Status Code", func() {
Expect(responseWriter.Status()).To(Equal(503))
})

It("emits a backend_invalid_id metric", func() {
Expect(metricReporter.CaptureBackendInvalidIDCallCount()).To(Equal(1))
})
})

Context("Untrusted Cert", func() {
BeforeEach(func() {
err = x509.UnknownAuthorityError{}
Expand All @@ -151,6 +168,22 @@ var _ = Describe("HandleError", func() {
})
})

Context("Untrusted Cert wrapped in IncompleteRequestError", func() {
BeforeEach(func() {
wrappedErr := x509.UnknownAuthorityError{}
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
errorHandler.HandleError(responseWriter, err)
})

It("has a 526 Status Code", func() {
Expect(responseWriter.Status()).To(Equal(526))
})

It("emits a backend_invalid_tls_cert metric", func() {
Expect(metricReporter.CaptureBackendInvalidTLSCertCallCount()).To(Equal(1))
})
})

Context("Attempted TLS with non-TLS backend error", func() {
BeforeEach(func() {
err = tls.RecordHeaderError{Msg: "bad handshake"}
Expand All @@ -166,6 +199,22 @@ var _ = Describe("HandleError", func() {
})
})

Context("Attempted TLS with non-TLS backend error wrapped in IncompleteRequestError", func() {
BeforeEach(func() {
wrappedErr := tls.RecordHeaderError{Msg: "bad handshake"}
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
errorHandler.HandleError(responseWriter, err)
})

It("has a 525 Status Code", func() {
Expect(responseWriter.Status()).To(Equal(525))
})

It("emits a backend_tls_handshake_failed metric", func() {
Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1))
})
})

Context("Remote handshake failure", func() {
BeforeEach(func() {
err = &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}
Expand All @@ -181,6 +230,22 @@ var _ = Describe("HandleError", func() {
})
})

Context("Remote handshake failure wrapped in IncompleteRequestError", func() {
BeforeEach(func() {
wrappedErr := &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
errorHandler.HandleError(responseWriter, err)
})

It("has a 525 Status Code", func() {
Expect(responseWriter.Status()).To(Equal(525))
})

It("emits a backend_tls_handshake_failed metric", func() {
Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1))
})
})

Context("Context Cancelled Error", func() {
BeforeEach(func() {
err = context.Canceled
Expand Down
Loading

0 comments on commit a7dbf84

Please sign in to comment.