Skip to content

Commit

Permalink
etcdserver: adjust tick fast-forwarding on restart
Browse files Browse the repository at this point in the history
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
  • Loading branch information
gyuho committed Mar 1, 2018
1 parent fe9b909 commit 5405135
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 12 deletions.
12 changes: 0 additions & 12 deletions etcdserver/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,16 +370,6 @@ func (r *raftNode) resumeSending() {
p.Resume()
}

// advanceTicksForElection advances ticks to the node for fast election.
// This reduces the time to wait for first leader election if bootstrapping the whole
// cluster, while leaving at least 1 heartbeat for possible existing leader
// to contact it.
func advanceTicksForElection(n raft.Node, electionTicks int) {
for i := 0; i < electionTicks-1; i++ {
n.Tick()
}
}

func startNode(cfg ServerConfig, cl *membership.RaftCluster, ids []types.ID) (id types.ID, n raft.Node, s *raft.MemoryStorage, w *wal.WAL) {
var err error
member := cl.MemberByName(cfg.Name)
Expand Down Expand Up @@ -417,7 +407,6 @@ func startNode(cfg ServerConfig, cl *membership.RaftCluster, ids []types.ID) (id
raftStatusMu.Lock()
raftStatus = n.Status
raftStatusMu.Unlock()
advanceTicksForElection(n, c.ElectionTick)
return id, n, s, w
}

Expand Down Expand Up @@ -451,7 +440,6 @@ func restartNode(cfg ServerConfig, snapshot *raftpb.Snapshot) (types.ID, *member
raftStatusMu.Lock()
raftStatus = n.Status
raftStatusMu.Unlock()
advanceTicksForElection(n, c.ElectionTick)
return id, cl, n, s, w
}

Expand Down
47 changes: 47 additions & 0 deletions etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
snapshot *raftpb.Snapshot
)

restart := false
switch {
case !haveWAL && !cfg.NewCluster:
if err = cfg.VerifyJoinExisting(); err != nil {
Expand Down Expand Up @@ -390,6 +391,7 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
cfg.Print()
if !cfg.ForceNewCluster {
id, cl, n, s, w = restartNode(cfg, snapshot)
restart = true
} else {
id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
}
Expand Down Expand Up @@ -521,9 +523,54 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
}
srv.r.transport = tr

activePeers := 0
for _, m := range cl.Members() {
if m.ID != id {
if tr.IsActive(m.ID) {
activePeers++
}
}
}

clusterN := len(cl.Members())
plog.Infof("%s has %d active peers among %d found member(s)", srv.ID(), activePeers, clusterN)

// only advance ticks iif
// 1. starting to fresh cluster (restart == false)
// AND
// 2-1. single-node cluster (no peer to wait for)
// OR
// 2-2. no active peer connection has been established
// (e.g. node that starts very first out of 3-node)
if !restart && (clusterN == 1 || activePeers == 0) {
// save one tick; leader may heartbeat before timeout
tick := cfg.ElectionTicks - 1
plog.Infof("%s is advancing %d ticks for faster election (election tick %d)", srv.ID(), tick, cfg.ElectionTicks)
advanceTicksForElection(n, tick)
} else {
// on restart, there is likely an active peer already
// with an active leader; adjust ticks to lower value
// in case leader heartbeats arrive, thus preventing
// disruptive elections
tick := cfg.ElectionTicks / 10 // default election tick is 10 (1s)
plog.Infof("%s is advancing %d ticks (election tick %d)", srv.ID(), tick, cfg.ElectionTicks)
advanceTicksForElection(n, tick)
}
return srv, nil
}

// advanceTicksForElection advances ticks to the node
// for fast election. This reduces the time to wait
// for first leader election if bootstrapping the whole
// cluster (especially cross data-center deployments),
// while leaving a few heartbeats for possible existing
// leader to contact it.
func advanceTicksForElection(n raft.Node, ticks int) {
for i := 0; i < ticks; i++ {
n.Tick()
}
}

// Start performs any initialization of the Server necessary for it to
// begin serving requests. It must be called before Do or Process.
// Start must be non-blocking; any long-running server functionality
Expand Down

0 comments on commit 5405135

Please sign in to comment.