Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

raft: fix deadlock during PreVote migration process #8525

Merged
merged 1 commit into from
Jan 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ func (r *raft) Step(m pb.Message) error {
}

case m.Term < r.Term:
if r.checkQuorum && (m.Type == pb.MsgHeartbeat || m.Type == pb.MsgApp) {
if (r.checkQuorum || r.preVote) && (m.Type == pb.MsgHeartbeat || m.Type == pb.MsgApp) {
// We have received messages from a leader at a lower term. It is possible
// that these messages were simply delayed in the network, but this could
// also mean that this node has advanced its term number during a network
Expand All @@ -748,7 +748,15 @@ func (r *raft) Step(m pb.Message) error {
// removed node will send MsgVotes (or MsgPreVotes) which will be ignored,
// but it will not receive MsgApp or MsgHeartbeat, so it will not create
// disruptive term increases
// The above comments also true for Pre-Vote
r.send(pb.Message{To: m.From, Type: pb.MsgAppResp})
} else if m.Type == pb.MsgPreVote {
// Before Pre-Vote enable, there may have candidate with higher term,
// but less log. After update to Pre-Vote, the cluster may deadlock if
// we drop messages with a lower term.
r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] rejected %s from %x [logterm: %d, index: %d] at term %d",
r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term)
r.send(pb.Message{To: m.From, Term: r.Term, Type: pb.MsgPreVoteResp, Reject: true})
} else {
// ignore other cases
r.logger.Infof("%x [term: %d] ignored a %s message with lower term from %x [term: %d]",
Expand Down
144 changes: 144 additions & 0 deletions raft/raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3290,6 +3290,150 @@ func TestPreVoteWithSplitVote(t *testing.T) {
}
}

// simulate rolling update a cluster for Pre-Vote. cluster has 3 nodes [n1, n2, n3].
// n1 is leader with term 2
// n2 is follower with term 2
// n3 is partitioned, with term 4 and less log, state is candidate
func newPreVoteMigrationCluster(t *testing.T) *network {
n1 := newTestRaft(1, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
n2 := newTestRaft(2, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
n3 := newTestRaft(3, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())

n1.becomeFollower(1, None)
n2.becomeFollower(1, None)
n3.becomeFollower(1, None)

n1.preVote = true
n2.preVote = true
// We intentionally do not enable PreVote for n3, this is done so in order
// to simulate a rolling restart process where it's possible to have a mixed
// version cluster with replicas with PreVote enabled, and replicas without.

nt := newNetwork(n1, n2, n3)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})

// Cause a network partition to isolate n3.
nt.isolate(3)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}})
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})

// check state
// n1.state == StateLeader
// n2.state == StateFollower
// n3.state == StateCandidate
if n1.state != StateLeader {
t.Fatalf("node 1 state: %s, want %s", n1.state, StateLeader)
}
if n2.state != StateFollower {
t.Fatalf("node 2 state: %s, want %s", n2.state, StateFollower)
}
if n3.state != StateCandidate {
t.Fatalf("node 3 state: %s, want %s", n3.state, StateCandidate)
}

// check term
// n1.Term == 2
// n2.Term == 2
// n3.Term == 4
if n1.Term != 2 {
t.Fatalf("node 1 term: %d, want %d", n1.Term, 2)
}
if n2.Term != 2 {
t.Fatalf("node 2 term: %d, want %d", n2.Term, 2)
}
if n3.Term != 4 {
t.Fatalf("node 3 term: %d, want %d", n3.Term, 4)
}

// Enable prevote on n3, then recover the network
n3.preVote = true
nt.recover()

return nt
}

func TestPreVoteMigrationCanCompleteElection(t *testing.T) {
nt := newPreVoteMigrationCluster(t)

// n1 is leader with term 2
// n2 is follower with term 2
// n3 is pre-candidate with term 4, and less log
n2 := nt.peers[2].(*raft)
n3 := nt.peers[3].(*raft)

// simulate leader down
nt.isolate(1)

// Call for elections from both n2 and n3.
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup})

// check state
// n2.state == Follower
// n3.state == PreCandidate
if n2.state != StateFollower {
t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower)
}
if n3.state != StatePreCandidate {
t.Errorf("node 3 state: %s, want %s", n3.state, StatePreCandidate)
}

nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup})

// Do we have a leader?
if n2.state != StateLeader && n3.state != StateFollower {
t.Errorf("no leader")
}
}

func TestPreVoteMigrationWithFreeStuckPreCandidate(t *testing.T) {
nt := newPreVoteMigrationCluster(t)

// n1 is leader with term 2
// n2 is follower with term 2
// n3 is pre-candidate with term 4, and less log
n1 := nt.peers[1].(*raft)
n2 := nt.peers[2].(*raft)
n3 := nt.peers[3].(*raft)

nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})

if n1.state != StateLeader {
t.Errorf("node 1 state: %s, want %s", n1.state, StateLeader)
}
if n2.state != StateFollower {
t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower)
}
if n3.state != StatePreCandidate {
t.Errorf("node 3 state: %s, want %s", n3.state, StatePreCandidate)
}

// Pre-Vote again for safety
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})

if n1.state != StateLeader {
t.Errorf("node 1 state: %s, want %s", n1.state, StateLeader)
}
if n2.state != StateFollower {
t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower)
}
if n3.state != StatePreCandidate {
t.Errorf("node 3 state: %s, want %s", n3.state, StatePreCandidate)
}

nt.send(pb.Message{From: 1, To: 3, Type: pb.MsgHeartbeat, Term: n1.Term})

// Disrupt the leader so that the stuck peer is freed
if n1.state != StateFollower {
t.Errorf("state = %s, want %s", n1.state, StateFollower)
}
if n3.Term != n1.Term {
t.Errorf("term = %d, want %d", n3.Term, n1.Term)
}
}

func entsWithConfig(configFunc func(*Config), terms ...uint64) *raft {
storage := NewMemoryStorage()
for i, term := range terms {
Expand Down