From 2847674f1720c8c554131b3c63068e49f9006e84 Mon Sep 17 00:00:00 2001 From: nolouch Date: Thu, 18 Oct 2018 20:12:23 +0800 Subject: [PATCH 1/7] server: use the same initcluster to restart joined member --- pkg/integration_test/join_test.go | 5 ++++ server/join.go | 39 +++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/pkg/integration_test/join_test.go b/pkg/integration_test/join_test.go index c66b45409c4..1c5ee1c2e0a 100644 --- a/pkg/integration_test/join_test.go +++ b/pkg/integration_test/join_test.go @@ -15,6 +15,7 @@ package integration import ( "context" + "os" "time" . "github.com/pingcap/check" @@ -44,6 +45,8 @@ func (s *integrationTestSuite) TestSimpleJoin(c *C) { c.Assert(err, IsNil) err = pd2.Run(context.TODO()) c.Assert(err, IsNil) + _, err = os.Stat(pd2.GetConfig().DataDir + "/join") + c.Assert(os.IsNotExist(err), IsFalse) members, err = etcdutil.ListEtcdMembers(client) c.Assert(err, IsNil) c.Assert(members.Members, HasLen, 2) @@ -57,6 +60,8 @@ func (s *integrationTestSuite) TestSimpleJoin(c *C) { c.Assert(err, IsNil) err = pd3.Run(context.TODO()) c.Assert(err, IsNil) + _, err = os.Stat(pd3.GetConfig().DataDir + "/join") + c.Assert(os.IsNotExist(err), IsFalse) members, err = etcdutil.ListEtcdMembers(client) c.Assert(err, IsNil) c.Assert(members.Members, HasLen, 3) diff --git a/server/join.go b/server/join.go index 70624bb2c2d..60849786c0a 100644 --- a/server/join.go +++ b/server/join.go @@ -15,6 +15,7 @@ package server import ( "fmt" + "io/ioutil" "os" "path" "strings" @@ -26,6 +27,15 @@ import ( log "github.com/sirupsen/logrus" ) +const ( + // privateFileMode grants owner to read/write a file. + privateFileMode = 0600 + // privateDirMode grants owner to make/remove files inside the directory. + privateDirMode = 0700 + + retryTimes = 100 +) + // PrepareJoinCluster sends MemberAdd command to PD cluster, // and returns the initial configuration of the PD cluster. // @@ -73,8 +83,20 @@ func PrepareJoinCluster(cfg *Config) error { return errors.New("join self is forbidden") } - // Cases with data directory. + filePath := cfg.DataDir + "/join" + // Read the persist join config + if _, err := os.Stat(filePath); !os.IsNotExist(err) { + s, err := ioutil.ReadFile(filePath) + if err != nil { + log.Fatal("read the join config meet error: ", err) + } + cfg.InitialCluster = strings.TrimSpace(string(s)) + cfg.InitialClusterState = embed.ClusterStateFlagExisting + return nil + } + initialCluster := "" + // Cases with data directory. if isDataExist(path.Join(cfg.DataDir, "member")) { cfg.InitialCluster = initialCluster cfg.InitialClusterState = embed.ClusterStateFlagExisting @@ -138,7 +160,20 @@ func PrepareJoinCluster(cfg *Config) error { initialCluster = strings.Join(pds, ",") cfg.InitialCluster = initialCluster cfg.InitialClusterState = embed.ClusterStateFlagExisting - return nil + err = os.Mkdir(cfg.DataDir, privateDirMode) + if err != nil && !os.IsExist(err) { + return err + } + + for i := 0; i < retryTimes; i++ { + err = ioutil.WriteFile(filePath, []byte(cfg.InitialCluster), privateFileMode) + if err != nil { + log.Errorf("persist join config failed: %s", err) + continue + } + break + } + return err } func isDataExist(d string) bool { From c0a0fbaddbd8d8a907e43b8940b340ff4d975436 Mon Sep 17 00:00:00 2001 From: nolouch Date: Fri, 19 Oct 2018 22:10:55 +0800 Subject: [PATCH 2/7] address comment --- server/join.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/join.go b/server/join.go index 60849786c0a..07acf2853c2 100644 --- a/server/join.go +++ b/server/join.go @@ -18,6 +18,7 @@ import ( "io/ioutil" "os" "path" + "path/filepath" "strings" "github.com/coreos/etcd/clientv3" @@ -83,7 +84,7 @@ func PrepareJoinCluster(cfg *Config) error { return errors.New("join self is forbidden") } - filePath := cfg.DataDir + "/join" + filePath := filepath.Join(cfg.DataDir, "join") // Read the persist join config if _, err := os.Stat(filePath); !os.IsNotExist(err) { s, err := ioutil.ReadFile(filePath) From 261337205c392bf0b13fe4c94ce91a95e1d249e5 Mon Sep 17 00:00:00 2001 From: nolouch Date: Tue, 23 Oct 2018 11:17:47 +0800 Subject: [PATCH 3/7] address comments --- server/join.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/join.go b/server/join.go index 07acf2853c2..ab8edbda476 100644 --- a/server/join.go +++ b/server/join.go @@ -161,7 +161,7 @@ func PrepareJoinCluster(cfg *Config) error { initialCluster = strings.Join(pds, ",") cfg.InitialCluster = initialCluster cfg.InitialClusterState = embed.ClusterStateFlagExisting - err = os.Mkdir(cfg.DataDir, privateDirMode) + err = os.MkdirAll(cfg.DataDir, privateDirMode) if err != nil && !os.IsExist(err) { return err } From c7ca4fb8623b5a3fe3c52f050c462a7579517824 Mon Sep 17 00:00:00 2001 From: nolouch Date: Tue, 23 Oct 2018 15:29:18 +0800 Subject: [PATCH 4/7] addreess comments --- pkg/integration_test/join_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/integration_test/join_test.go b/pkg/integration_test/join_test.go index 1c5ee1c2e0a..a8155c91793 100644 --- a/pkg/integration_test/join_test.go +++ b/pkg/integration_test/join_test.go @@ -16,6 +16,7 @@ package integration import ( "context" "os" + "path/filepath" "time" . "github.com/pingcap/check" @@ -45,7 +46,7 @@ func (s *integrationTestSuite) TestSimpleJoin(c *C) { c.Assert(err, IsNil) err = pd2.Run(context.TODO()) c.Assert(err, IsNil) - _, err = os.Stat(pd2.GetConfig().DataDir + "/join") + _, err = os.Stat(filepath.Join(pd2.GetConfig().DataDir, "join")) c.Assert(os.IsNotExist(err), IsFalse) members, err = etcdutil.ListEtcdMembers(client) c.Assert(err, IsNil) @@ -60,7 +61,7 @@ func (s *integrationTestSuite) TestSimpleJoin(c *C) { c.Assert(err, IsNil) err = pd3.Run(context.TODO()) c.Assert(err, IsNil) - _, err = os.Stat(pd3.GetConfig().DataDir + "/join") + _, err = os.Stat(filepath.Join(pd3.GetConfig().DataDir, "join")) c.Assert(os.IsNotExist(err), IsFalse) members, err = etcdutil.ListEtcdMembers(client) c.Assert(err, IsNil) From cb475a5f2635fcaf6a7fb93c3f37ff3b3de73993 Mon Sep 17 00:00:00 2001 From: nolouch Date: Tue, 23 Oct 2018 17:59:30 +0800 Subject: [PATCH 5/7] avoid concurrent join --- server/join.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/join.go b/server/join.go index ab8edbda476..42ea79e5a6f 100644 --- a/server/join.go +++ b/server/join.go @@ -126,6 +126,9 @@ func PrepareJoinCluster(cfg *Config) error { existed := false for _, m := range listResp.Members { + if len(m.Name) == 0 { + return errors.New("exsist a member that the join is not completed") + } if m.Name == cfg.Name { existed = true } @@ -154,6 +157,9 @@ func PrepareJoinCluster(cfg *Config) error { if memb.ID == addResp.Member.ID { n = cfg.Name } + if len(n) == 0 { + return errors.New("exsist a member that the join is not completed") + } for _, m := range memb.PeerURLs { pds = append(pds, fmt.Sprintf("%s=%s", n, m)) } From dc8c434a50de03988959b5bbb66c04329c43b78d Mon Sep 17 00:00:00 2001 From: nolouch Date: Tue, 23 Oct 2018 19:40:57 +0800 Subject: [PATCH 6/7] address comments --- server/join.go | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/server/join.go b/server/join.go index 42ea79e5a6f..0a675e9d424 100644 --- a/server/join.go +++ b/server/join.go @@ -33,8 +33,6 @@ const ( privateFileMode = 0600 // privateDirMode grants owner to make/remove files inside the directory. privateDirMode = 0700 - - retryTimes = 100 ) // PrepareJoinCluster sends MemberAdd command to PD cluster, @@ -127,7 +125,7 @@ func PrepareJoinCluster(cfg *Config) error { existed := false for _, m := range listResp.Members { if len(m.Name) == 0 { - return errors.New("exsist a member that the join is not completed") + return errors.New("there is a member that has not joined successfully") } if m.Name == cfg.Name { existed = true @@ -158,7 +156,7 @@ func PrepareJoinCluster(cfg *Config) error { n = cfg.Name } if len(n) == 0 { - return errors.New("exsist a member that the join is not completed") + return errors.New("there is a member that has not joined successfully") } for _, m := range memb.PeerURLs { pds = append(pds, fmt.Sprintf("%s=%s", n, m)) @@ -169,18 +167,11 @@ func PrepareJoinCluster(cfg *Config) error { cfg.InitialClusterState = embed.ClusterStateFlagExisting err = os.MkdirAll(cfg.DataDir, privateDirMode) if err != nil && !os.IsExist(err) { - return err + return errors.WithStack(err) } - for i := 0; i < retryTimes; i++ { - err = ioutil.WriteFile(filePath, []byte(cfg.InitialCluster), privateFileMode) - if err != nil { - log.Errorf("persist join config failed: %s", err) - continue - } - break - } - return err + err = ioutil.WriteFile(filePath, []byte(cfg.InitialCluster), privateFileMode) + return errors.WithStack(err) } func isDataExist(d string) bool { From d365d659e34f1c16118bac617fa6527f5e07a9bf Mon Sep 17 00:00:00 2001 From: nolouch Date: Wed, 24 Oct 2018 14:41:46 +0800 Subject: [PATCH 7/7] address comments --- pkg/integration_test/join_test.go | 6 +++--- server/join.go | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/integration_test/join_test.go b/pkg/integration_test/join_test.go index a8155c91793..f140a1c0184 100644 --- a/pkg/integration_test/join_test.go +++ b/pkg/integration_test/join_test.go @@ -16,7 +16,7 @@ package integration import ( "context" "os" - "path/filepath" + "path" "time" . "github.com/pingcap/check" @@ -46,7 +46,7 @@ func (s *integrationTestSuite) TestSimpleJoin(c *C) { c.Assert(err, IsNil) err = pd2.Run(context.TODO()) c.Assert(err, IsNil) - _, err = os.Stat(filepath.Join(pd2.GetConfig().DataDir, "join")) + _, err = os.Stat(path.Join(pd2.GetConfig().DataDir, "join")) c.Assert(os.IsNotExist(err), IsFalse) members, err = etcdutil.ListEtcdMembers(client) c.Assert(err, IsNil) @@ -61,7 +61,7 @@ func (s *integrationTestSuite) TestSimpleJoin(c *C) { c.Assert(err, IsNil) err = pd3.Run(context.TODO()) c.Assert(err, IsNil) - _, err = os.Stat(filepath.Join(pd3.GetConfig().DataDir, "join")) + _, err = os.Stat(path.Join(pd3.GetConfig().DataDir, "join")) c.Assert(os.IsNotExist(err), IsFalse) members, err = etcdutil.ListEtcdMembers(client) c.Assert(err, IsNil) diff --git a/server/join.go b/server/join.go index 0a675e9d424..3de0d21c94d 100644 --- a/server/join.go +++ b/server/join.go @@ -18,7 +18,6 @@ import ( "io/ioutil" "os" "path" - "path/filepath" "strings" "github.com/coreos/etcd/clientv3" @@ -82,7 +81,7 @@ func PrepareJoinCluster(cfg *Config) error { return errors.New("join self is forbidden") } - filePath := filepath.Join(cfg.DataDir, "join") + filePath := path.Join(cfg.DataDir, "join") // Read the persist join config if _, err := os.Stat(filePath); !os.IsNotExist(err) { s, err := ioutil.ReadFile(filePath)