Skip to content

Commit

Permalink
Add schema verification when closing etcd.
Browse files Browse the repository at this point in the history
Signed-off-by: Siyuan Zhang <sizhang@google.com>
  • Loading branch information
siyuanfoundation committed Feb 15, 2024
1 parent 24142c0 commit 4b197f6
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 0 deletions.
5 changes: 5 additions & 0 deletions embed/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"go.etcd.io/etcd/etcdserver/api/v2v3"
"go.etcd.io/etcd/etcdserver/api/v3client"
"go.etcd.io/etcd/etcdserver/api/v3rpc"
"go.etcd.io/etcd/etcdserver/verify"
"go.etcd.io/etcd/pkg/debugutil"
runtimeutil "go.etcd.io/etcd/pkg/runtime"
"go.etcd.io/etcd/pkg/transport"
Expand Down Expand Up @@ -376,6 +377,10 @@ func (e *Etcd) Close() {
defer func() {
if lg != nil {
lg.Info("closed etcd server", fields...)
verify.MustVerifyIfEnabled(verify.Config{
Logger: lg,
DataDir: e.cfg.Dir,
})
lg.Sync()
}
}()
Expand Down
20 changes: 20 additions & 0 deletions etcdserver/verify/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2021 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package verify

// verify package is analyzing persistent state of etcd to find potential
// inconsistencies.
// In particular it covers cross-checking between different aspacts of etcd
// storage like WAL & Backend.
114 changes: 114 additions & 0 deletions etcdserver/verify/verify.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright 2021 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package verify

import (
"fmt"
"os"
"path/filepath"

"go.etcd.io/etcd/mvcc"
"go.etcd.io/etcd/mvcc/backend"
"go.etcd.io/etcd/pkg/fileutil"
"go.etcd.io/etcd/version"
"go.uber.org/zap"
)

const ENV_VERIFY = "ETCD_VERIFY"
const ENV_VERIFY_ALL_VALUE = "all"

type Config struct {
// DataDir is a root directory where the data being verified are stored.
DataDir string

Logger *zap.Logger
}

// Verify performs consistency checks of given etcd data-directory.
// The errors are reported as the returned error, but for some situations
// the function can also panic.
// The function is expected to work on not-in-use data model, i.e.
// no file-locks should be taken. Verify does not modified the data.
func Verify(cfg Config) error {
lg := cfg.Logger
if lg == nil {
lg = zap.NewNop()
}

if !fileutil.Exist(toBackendFileName(cfg.DataDir)) {
lg.Info("verification skipped due to non exist db file")
return nil
}

var err error
lg.Info("verification of persisted state", zap.String("data-dir", cfg.DataDir))
defer func() {
if err != nil {
lg.Error("verification of persisted state failed",
zap.String("data-dir", cfg.DataDir),
zap.Error(err))
} else if r := recover(); r != nil {
lg.Error("verification of persisted state failed",
zap.String("data-dir", cfg.DataDir))
panic(r)
} else {
lg.Info("verification of persisted state successful", zap.String("data-dir", cfg.DataDir))
}
}()

beConfig := backend.DefaultBackendConfig()
beConfig.Path = toBackendFileName(cfg.DataDir)
beConfig.Logger = cfg.Logger

be := backend.New(beConfig)
defer be.Close()

err = validateSchema(lg, be)
return err
}

// VerifyIfEnabled performs verification according to ETCD_VERIFY env settings.
// See Verify for more information.
func VerifyIfEnabled(cfg Config) error {
if os.Getenv(ENV_VERIFY) == ENV_VERIFY_ALL_VALUE {
return Verify(cfg)
}
return nil
}

// MustVerifyIfEnabled performs verification according to ETCD_VERIFY env settings
// and exits in case of found problems.
// See Verify for more information.
func MustVerifyIfEnabled(cfg Config) {
if err := VerifyIfEnabled(cfg); err != nil {
cfg.Logger.Fatal("Verification failed",
zap.String("data-dir", cfg.DataDir),
zap.Error(err))
}
}

func validateSchema(lg *zap.Logger, be backend.Backend) error {
be.ReadTx().RLock()
defer be.ReadTx().RUnlock()
v := mvcc.UnsafeDetectSchemaVersion(lg, be.ReadTx())
if !v.Equal(version.V3_4) {
return fmt.Errorf("detected unsupported data schema: %s", v.String())
}
return nil
}

func toBackendFileName(dataDir string) string {
return filepath.Join(dataDir, "member", "snap", "db")
}
11 changes: 11 additions & 0 deletions integration/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
lockpb "go.etcd.io/etcd/etcdserver/api/v3lock/v3lockpb"
"go.etcd.io/etcd/etcdserver/api/v3rpc"
pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
"go.etcd.io/etcd/etcdserver/verify"
"go.etcd.io/etcd/pkg/logutil"
"go.etcd.io/etcd/pkg/testutil"
"go.etcd.io/etcd/pkg/tlsutil"
Expand Down Expand Up @@ -570,6 +571,7 @@ type member struct {
useIP bool

isLearner bool
closed bool
}

func (m *member) GRPCAddr() string { return m.grpcAddr }
Expand Down Expand Up @@ -1045,6 +1047,15 @@ func (m *member) Close() {
for _, f := range m.serverClosers {
f()
}
if !m.closed {
// Avoid verification of the same file multiple times
// (that might not exist any longer)
verify.MustVerifyIfEnabled(verify.Config{
Logger: m.Logger,
DataDir: m.DataDir,
})
}
m.closed = true
}

// Stop stops the member, but the data dir of the member is preserved.
Expand Down
2 changes: 2 additions & 0 deletions test
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
# $ COVERDIR=coverage PASSES="build_cov cov" ./test
set -euo pipefail

export ETCD_VERIFY=all

source ./build

PASSES=${PASSES:-}
Expand Down

0 comments on commit 4b197f6

Please sign in to comment.