Skip to content

Commit

Permalink
resetNodes should respect the deadNodeReclaimTime
Browse files Browse the repository at this point in the history
When deadNodeReclaimTime is 0, dead nodes should not be reclaimed per
the definition.

To avoid breaking changes, set the default deadNodeReclaimTime of LAN/WAN
configurations to 30 seconds.
  • Loading branch information
xliuxu committed Aug 19, 2024
1 parent 3f82dc1 commit 8899f92
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 53 deletions.
6 changes: 4 additions & 2 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,8 @@ type Config struct {
UDPBufferSize int

// DeadNodeReclaimTime controls the time before a dead node's name can be
// reclaimed by one with a different address or port. By default, this is 0,
// meaning nodes cannot be reclaimed this way.
// reclaimed by one with a different address or port. Setting DeadNodeReclaimTime
// to 0 means that dead nodes will never be reclaimed.
DeadNodeReclaimTime time.Duration

// RequireNodeNames controls if the name of a node is required when sending
Expand Down Expand Up @@ -319,6 +319,8 @@ func DefaultLANConfig() *Config {
DisableTcpPings: false, // TCP pings are safe, even with mixed versions
AwarenessMaxMultiplier: 8, // Probe interval backs off to 8 seconds

DeadNodeReclaimTime: 30 * time.Second, // Reclaim dead nodes after 30 seconds

GossipNodes: 3, // Gossip to 3 nodes
GossipInterval: 200 * time.Millisecond, // Gossip more rapidly
GossipToTheDeadTime: 30 * time.Second, // Same as push/pull
Expand Down
2 changes: 1 addition & 1 deletion state.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ func (m *Memberlist) resetNodes() {
defer m.nodeLock.Unlock()

// Move dead nodes, but respect gossip to the dead interval
deadIdx := moveDeadNodes(m.nodes, m.config.GossipToTheDeadTime)
deadIdx := moveDeadNodes(m.nodes, m.config.DeadNodeReclaimTime, m.config.GossipToTheDeadTime)

// Deregister the dead nodes
for i := deadIdx; i < len(m.nodes); i++ {
Expand Down
16 changes: 13 additions & 3 deletions util.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,26 @@ func pushPullScale(interval time.Duration, n int) time.Duration {

// moveDeadNodes moves dead and left nodes that that have not changed during the gossipToTheDeadTime interval
// to the end of the slice and returns the index of the first moved node.
func moveDeadNodes(nodes []*nodeState, gossipToTheDeadTime time.Duration) int {
func moveDeadNodes(nodes []*nodeState, deadNodeReclaimTime, gossipToTheDeadTime time.Duration) int {
numDead := 0
n := len(nodes)
for i := 0; i < n-numDead; i++ {
if !nodes[i].DeadOrLeft() {
continue
}

// Respect the gossip to the dead interval
if time.Since(nodes[i].StateChange) <= gossipToTheDeadTime {
if nodes[i].State == StateDead && deadNodeReclaimTime == 0 {
// If deadNodeReclaimTime is 0, we don't reclaim dead nodes
continue
}

reclaimTime := deadNodeReclaimTime
if gossipToTheDeadTime > reclaimTime {
reclaimTime = gossipToTheDeadTime
}

// Respect the gossip to the dead interval and the dead node reclaim time
if time.Since(nodes[i].StateChange) <= reclaimTime {
continue
}

Expand Down
130 changes: 83 additions & 47 deletions util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -166,76 +167,111 @@ func TestPushPullScale(t *testing.T) {
}

func TestMoveDeadNodes(t *testing.T) {
nodes := []*nodeState{
&nodeState{
now := time.Now()
nodeStates := []*nodeState{
{
State: StateDead,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateAlive,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
// This dead node should not be moved, as its state changed
// less than the specified GossipToTheDead time ago
&nodeState{
{
State: StateDead,
StateChange: time.Now().Add(-10 * time.Second),
StateChange: now.Add(-10 * time.Second),
},
// This left node should not be moved, as its state changed
// less than the specified GossipToTheDead time ago
&nodeState{
{
State: StateLeft,
StateChange: time.Now().Add(-10 * time.Second),
StateChange: now.Add(-10 * time.Second),
},
&nodeState{
{
State: StateLeft,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateAlive,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateDead,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateAlive,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateLeft,
StateChange: now.Add(-20 * time.Second),
},
{
State: StateLeft,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-30 * time.Second),
},
{
State: StateDead,
StateChange: now.Add(-30 * time.Second),
},
}

idx := moveDeadNodes(nodes, (15 * time.Second))
if idx != 5 {
t.Fatalf("bad index")
tests := []struct {
name string
deadNodeReclaimTime time.Duration
gossipToTheDeadTime time.Duration
expectedIndex int
}{
{
name: "Do not reclaim dead nodes when deadNodeReclaimTime is 0",
deadNodeReclaimTime: 0,
gossipToTheDeadTime: 15 * time.Second,
expectedIndex: 8,
},
{
name: "Reclaim dead nodes when deadNodeReclaimTime is greater than 0",
deadNodeReclaimTime: 10 * time.Second,
gossipToTheDeadTime: 15 * time.Second,
expectedIndex: 5,
},
{
name: "deadNodeReclaimTime is greater than gossipToTheDeadTime",
deadNodeReclaimTime: 25 * time.Second,
gossipToTheDeadTime: 15 * time.Second,
expectedIndex: 9,
},
}
for i := 0; i < idx; i++ {
switch i {
case 2:
// Recently dead node remains at index 2,
// since nodes are swapped out to move to end.
if nodes[i].State != StateDead {
t.Fatalf("Bad state %d", i)
}
case 3:
//Recently left node should remain at 3
if nodes[i].State != StateLeft {
t.Fatalf("Bad State %d", i)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
states := make([]*nodeState, len(nodeStates))
copy(states, nodeStates)
index := moveDeadNodes(states, tt.deadNodeReclaimTime, tt.gossipToTheDeadTime)
assert.Equal(t, tt.expectedIndex, index)

reclaimTime := now.Add(tt.deadNodeReclaimTime)
if tt.gossipToTheDeadTime > tt.deadNodeReclaimTime {
reclaimTime = now.Add(tt.gossipToTheDeadTime)
}
default:
if nodes[i].State != StateAlive {
t.Fatalf("Bad state %d", i)

if tt.deadNodeReclaimTime == 0 {
for i := 0; i < index; i++ {
if states[i].State == StateLeft {
assert.True(t, states[i].StateChange.Before(reclaimTime), "node %d should have been moved", i)
}
}
for i := index; i < len(states); i++ {
assert.Equal(t, StateLeft, states[i].State)
}
} else {
for i := 0; i < index; i++ {
if states[i].DeadOrLeft() {
assert.True(t, states[i].StateChange.Before(reclaimTime), "node %d should have been moved", i)
}
}
for i := index; i < len(states); i++ {
assert.True(t, states[i].DeadOrLeft())
}
}
}
}
for i := idx; i < len(nodes); i++ {
if !nodes[i].DeadOrLeft() {
t.Fatalf("Bad state %d", i)
}
})
}
}

Expand Down

0 comments on commit 8899f92

Please sign in to comment.