From aaa9a139340f423fe780050d156954291fc82a74 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sun, 28 Oct 2018 20:29:50 -0700 Subject: [PATCH 1/6] go.mod --- go.mod | 17 +++++++++++++++++ go.sum | 31 +++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 go.mod create mode 100644 go.sum diff --git a/go.mod b/go.mod new file mode 100644 index 000000000..87587e65c --- /dev/null +++ b/go.mod @@ -0,0 +1,17 @@ +module github.com/hashicorp/memberlist + +require ( + github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/hashicorp/go-immutable-radix v1.0.0 // indirect + github.com/hashicorp/go-msgpack v0.0.0-20150518234257-fa3f63826f7c + github.com/hashicorp/go-multierror v1.0.0 + github.com/hashicorp/go-sockaddr v0.0.0-20180320115054-6d291a969b86 + github.com/miekg/dns v1.0.14 + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 + github.com/stretchr/testify v1.2.2 + golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3 // indirect + golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519 // indirect + golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 000000000..d33fdb46a --- /dev/null +++ b/go.sum @@ -0,0 +1,31 @@ +github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da h1:8GUt8eRujhVEGZFFEjBj46YV4rDjvGrNxb0KMWYkL2I= +github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= +github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-msgpack v0.0.0-20150518234257-fa3f63826f7c h1:BTAbnbegUIMB6xmQCwWE8yRzbA4XSpnZY5hvRJC188I= +github.com/hashicorp/go-msgpack v0.0.0-20150518234257-fa3f63826f7c/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-multierror v1.0.0 h1:iVjPR7a6H0tWELX5NxNe7bYopibicUzc7uPribsnS6o= +github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-sockaddr v0.0.0-20180320115054-6d291a969b86 h1:7YOlAIO2YWnJZkQp7B5eFykaIY7C9JndqAFQyVV5BhM= +github.com/hashicorp/go-sockaddr v0.0.0-20180320115054-6d291a969b86/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= +github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/miekg/dns v1.0.14 h1:9jZdLNd/P4+SfEJ0TNyxYpsK8N4GtfylBLqtbYN1sbA= +github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= +github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= +github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3 h1:KYQXGkl6vs02hK7pK4eIbw0NpNPedieTSTEiJ//bwGs= +golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519 h1:x6rhz8Y9CjbgQkccRGmELH6K+LJj7tOoh3XWeC1yaQM= +golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5 h1:x6r4Jo0KNzOOzYd8lbcRsqjuqEASK6ob3auvWYM4/8U= +golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= From ad2723c30308ec595e88839bac84a04d7e183a4c Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sun, 28 Oct 2018 20:38:01 -0700 Subject: [PATCH 2/6] Update .travis.yml for Go 1.11 modules --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 98b74aad5..b4f8f1c47 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,5 +9,7 @@ branches: only: - master -install: - - make deps +install: true + +env: + - GO111MODULE=on # Enable Go modules in 1.11 From 5bbe6109b20f31953c249f9dbf6740971fd322c0 Mon Sep 17 00:00:00 2001 From: Chris Baker Date: Mon, 17 Dec 2018 13:53:38 +0000 Subject: [PATCH 3/6] added back-off to accept loop to avoid a tight loop --- net_transport.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net_transport.go b/net_transport.go index e7b88b01f..87058c0dc 100644 --- a/net_transport.go +++ b/net_transport.go @@ -221,6 +221,11 @@ func (t *NetTransport) Shutdown() error { // and hands them off to the stream channel. func (t *NetTransport) tcpListen(tcpLn *net.TCPListener) { defer t.wg.Done() + + const baseDelay = 5 * time.Millisecond + const maxDelay = 1 * time.Second + + var loopDelay time.Duration for { conn, err := tcpLn.AcceptTCP() if err != nil { @@ -228,9 +233,22 @@ func (t *NetTransport) tcpListen(tcpLn *net.TCPListener) { break } + if loopDelay == 0 { + loopDelay = baseDelay + } else { + loopDelay *= 2 + } + + if loopDelay > maxDelay { + loopDelay = maxDelay + } + t.logger.Printf("[ERR] memberlist: Error accepting TCP connection: %v", err) + time.Sleep(loopDelay) continue } + // No error, reset loop delay + loopDelay = 0 t.streamCh <- conn } From 419737d7de2542cedf933fd61c0411d9bbb5e085 Mon Sep 17 00:00:00 2001 From: Chris Baker Date: Mon, 17 Dec 2018 17:26:59 +0000 Subject: [PATCH 4/6] added comments to the backoff delay constants in net_transport --- net_transport.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net_transport.go b/net_transport.go index 87058c0dc..4723127f5 100644 --- a/net_transport.go +++ b/net_transport.go @@ -222,7 +222,12 @@ func (t *NetTransport) Shutdown() error { func (t *NetTransport) tcpListen(tcpLn *net.TCPListener) { defer t.wg.Done() + // baseDelay is the initial delay after an AcceptTCP() error before attempting again const baseDelay = 5 * time.Millisecond + + // maxDelay is the maximum delay after an AcceptTCP() error before attempting again. + // In the case that tcpListen() is error-looping, it will delay the shutdown check. + // Therefore, changes to maxDelay may have an effect on the latency of shutdown. const maxDelay = 1 * time.Second var loopDelay time.Duration From 5022e6aaaf11495d6eba3e03de7e98f375b5c024 Mon Sep 17 00:00:00 2001 From: Chris Baker Date: Thu, 20 Dec 2018 22:30:45 +0000 Subject: [PATCH 5/6] first pass at a test for AcceptTCP error backoff --- transport_test.go | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/transport_test.go b/transport_test.go index f1c82da3e..a97283654 100644 --- a/transport_test.go +++ b/transport_test.go @@ -1,6 +1,9 @@ package memberlist import ( + "log" + "net" + "sync/atomic" "testing" "time" @@ -133,3 +136,51 @@ func TestTransport_Send(t *testing.T) { // assert send ordering. Sort both slices to be tolerant of re-ordering. require.ElementsMatch(t, expected, received) } + +type testCountingWriter struct { + t *testing.T + numCalls *int32 +} + +func (tw testCountingWriter) Write(p []byte) (n int, err error) { + atomic.AddInt32(tw.numCalls, 1) + tw.t.Log("countingWriter", string(p)) + return len(p), nil +} + +func TestTransport_TcpListenBackoff(t *testing.T) { + + listener, _ := net.ListenTCP("tcp", nil) + listener.Close() + + var numCalls int32 + countingWriter := testCountingWriter{t, &numCalls} + countingLogger := log.New(countingWriter, "test", log.LstdFlags) + transport := NetTransport{ + streamCh: make(chan net.Conn), + logger: countingLogger, + } + transport.wg.Add(1) + + go transport.tcpListen(listener) + + time.Sleep(5 * time.Second) + atomic.StoreInt32(&transport.shutdown, 1) + + c := make(chan struct{}) + go func() { + defer close(c) + transport.wg.Wait() + }() + select { + case <- c: + case <-time.After(1 * time.Second): + t.Error("timed out waiting for transport waitgroup to be done after setting shutdown == 1") + } + + t.Log(countingWriter.numCalls) + require.Equal(t, len(transport.streamCh), 0) + require.True(t, numCalls > 10) + require.True(t, numCalls < 15) +} + From f4e244579e82e3a4f3b61388390763fe77c55465 Mon Sep 17 00:00:00 2001 From: Chris Baker Date: Fri, 21 Dec 2018 15:12:55 +0000 Subject: [PATCH 6/6] added comments and some additional flexibility to the tcpListen back-off tests --- transport_test.go | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/transport_test.go b/transport_test.go index a97283654..e9ef9d19b 100644 --- a/transport_test.go +++ b/transport_test.go @@ -3,6 +3,7 @@ package memberlist import ( "log" "net" + "strings" "sync/atomic" "testing" "time" @@ -144,14 +145,22 @@ type testCountingWriter struct { func (tw testCountingWriter) Write(p []byte) (n int, err error) { atomic.AddInt32(tw.numCalls, 1) - tw.t.Log("countingWriter", string(p)) + if !strings.Contains(string(p), "memberlist: Error accepting TCP connection") { + tw.t.Error("did not receive expected log message") + } + tw.t.Log("countingWriter:", string(p)) return len(p), nil } +// TestTransport_TcpListenBackoff tests that AcceptTCP() errors in NetTransport#tcpListen() +// do not result in a tight loop and spam the log. We verify this here by counting the number +// of entries logged in a given time period. func TestTransport_TcpListenBackoff(t *testing.T) { - listener, _ := net.ListenTCP("tcp", nil) - listener.Close() + // testTime is the amount of time we will allow NetTransport#tcpListen() to run + // This needs to be long enough that to verify that maxDelay is in force, + // but not so long as to be obnoxious when running the test suite. + const testTime = 4 * time.Second var numCalls int32 countingWriter := testCountingWriter{t, &numCalls} @@ -162,11 +171,17 @@ func TestTransport_TcpListenBackoff(t *testing.T) { } transport.wg.Add(1) + // create a listener that will cause AcceptTCP calls to fail + listener, _ := net.ListenTCP("tcp", nil) + listener.Close() go transport.tcpListen(listener) - time.Sleep(5 * time.Second) + // sleep (+yield) for testTime seconds before asking the accept loop to shut down + time.Sleep(testTime) atomic.StoreInt32(&transport.shutdown, 1) + // Verify that the wg was completed on exit (but without blocking this test) + // maxDelay == 1s, so we will give the routine 1.25s to loop around and shut down. c := make(chan struct{}) go func() { defer close(c) @@ -174,13 +189,22 @@ func TestTransport_TcpListenBackoff(t *testing.T) { }() select { case <- c: - case <-time.After(1 * time.Second): - t.Error("timed out waiting for transport waitgroup to be done after setting shutdown == 1") - } - - t.Log(countingWriter.numCalls) + case <-time.After(1250 * time.Millisecond): + t.Error("timed out waiting for transport waitgroup to be done after flagging shutdown") + } + + // In testTime==4s, we expect to loop approximately 12 times (and log approximately 11 errors), + // with the following delays (in ms): + // 0+5+10+20+40+80+160+320+640+1000+1000+1000 == 4275 ms + // Too few calls suggests that the minDelay is not in force; too many calls suggests that the + // maxDelay is not in force or that the back-off isn't working at all. + // We'll leave a little flex; the important thing here is the asymptotic behavior. + // If the minDelay or maxDelay in NetTransport#tcpListen() are modified, this test may fail + // and need to be adjusted. + require.True(t, numCalls > 8) + require.True(t, numCalls < 14) + + // no connections should have been accepted and sent to the channel require.Equal(t, len(transport.streamCh), 0) - require.True(t, numCalls > 10) - require.True(t, numCalls < 15) }