Skip to content

Commit

Permalink
server: Fix the issue about RaftCluster cannot be stopped (#1370) (#1375
Browse files Browse the repository at this point in the history
)

* server: fix the issue about RaftCluster cannot be stopped

Signed-off-by: nolouch <nolouch@gmail.com>
  • Loading branch information
nolouch authored and disksing committed Dec 12, 2018
1 parent 5672765 commit 692d0c2
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 4 deletions.
11 changes: 7 additions & 4 deletions server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ import (
log "github.com/sirupsen/logrus"
)

const (
backgroundJobInterval = time.Minute
)
var backgroundJobInterval = time.Minute

// RaftCluster is used for cluster config management.
// Raft cluster key format:
Expand Down Expand Up @@ -117,6 +115,10 @@ func (c *RaftCluster) start() error {

c.wg.Add(2)
go c.runCoordinator()
// gofail: var highFrequencyClusterJobs bool
// if highFrequencyClusterJobs {
// backgroundJobInterval = 100 * time.Microsecond
// }
go c.runBackgroundJobs(backgroundJobInterval)

c.running = true
Expand All @@ -135,16 +137,17 @@ func (c *RaftCluster) runCoordinator() {

func (c *RaftCluster) stop() {
c.Lock()
defer c.Unlock()

if !c.running {
c.Unlock()
return
}

c.running = false

close(c.quit)
c.coordinator.stop()
c.Unlock()
c.wg.Wait()
}

Expand Down
31 changes: 31 additions & 0 deletions server/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ import (
"fmt"
"strings"
"sync"
"time"

"github.com/coreos/etcd/clientv3"
. "github.com/pingcap/check"
gofail "github.com/pingcap/gofail/runtime"
"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/kvproto/pkg/pdpb"
"github.com/pingcap/pd/server/core"
Expand Down Expand Up @@ -426,6 +428,35 @@ func (s *testClusterSuite) TestRaftClusterRestart(c *C) {
cluster.stop()
}

// Make sure PD will not deadlock if it start and stop again and again.
func (s *testClusterSuite) TestRaftClusterMultipleRestar(c *C) {
var err error
_, s.svr, s.cleanup, err = NewTestServer()
defer s.cleanup()
c.Assert(err, IsNil)
mustWaitLeader(c, []*Server{s.svr})
_, err = s.svr.bootstrapCluster(s.newBootstrapRequest(c, s.svr.clusterID, "127.0.0.1:0"))
c.Assert(err, IsNil)
// add an offline store
store := s.newStore(c, s.allocID(c), "127.0.0.1:4")
store.State = metapb.StoreState_Offline
cluster := s.svr.GetRaftCluster()
err = cluster.putStore(store)
c.Assert(err, IsNil)
c.Assert(cluster, NotNil)

// let the job run at small interval
gofail.Enable("github.com/pingcap/pd/server/highFrequencyClusterJobs", `return(true)`)
for i := 0; i < 100; i++ {
err = s.svr.createRaftCluster()
c.Assert(err, IsNil)
time.Sleep(time.Millisecond)
cluster = s.svr.GetRaftCluster()
c.Assert(cluster, NotNil)
cluster.stop()
}
}

func (s *testClusterSuite) TestGetPDMembers(c *C) {

req := &pdpb.GetMembersRequest{
Expand Down

0 comments on commit 692d0c2

Please sign in to comment.