From 1b3688f38e2e744a20e9ff35aad26c6cd0823f15 Mon Sep 17 00:00:00 2001 From: Siyuan Zhang Date: Thu, 8 Feb 2024 13:06:36 -0800 Subject: [PATCH] Add schema verification when closing etcd. Signed-off-by: Siyuan Zhang --- embed/etcd.go | 5 ++ etcdserver/verify/doc.go | 20 +++++++ etcdserver/verify/verify.go | 114 ++++++++++++++++++++++++++++++++++++ integration/cluster.go | 11 ++++ test | 2 + 5 files changed, 152 insertions(+) create mode 100644 etcdserver/verify/doc.go create mode 100644 etcdserver/verify/verify.go diff --git a/embed/etcd.go b/embed/etcd.go index 6ad2f065f822..a09e8e5da1cd 100644 --- a/embed/etcd.go +++ b/embed/etcd.go @@ -38,6 +38,7 @@ import ( "go.etcd.io/etcd/etcdserver/api/v2v3" "go.etcd.io/etcd/etcdserver/api/v3client" "go.etcd.io/etcd/etcdserver/api/v3rpc" + "go.etcd.io/etcd/etcdserver/verify" "go.etcd.io/etcd/pkg/debugutil" runtimeutil "go.etcd.io/etcd/pkg/runtime" "go.etcd.io/etcd/pkg/transport" @@ -376,6 +377,10 @@ func (e *Etcd) Close() { defer func() { if lg != nil { lg.Info("closed etcd server", fields...) + verify.MustVerifyIfEnabled(verify.Config{ + Logger: lg, + DataDir: e.cfg.Dir, + }) lg.Sync() } }() diff --git a/etcdserver/verify/doc.go b/etcdserver/verify/doc.go new file mode 100644 index 000000000000..2c42bf6f1985 --- /dev/null +++ b/etcdserver/verify/doc.go @@ -0,0 +1,20 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verify + +// verify package is analyzing persistent state of etcd to find potential +// inconsistencies. +// In particular it covers cross-checking between different aspacts of etcd +// storage like WAL & Backend. diff --git a/etcdserver/verify/verify.go b/etcdserver/verify/verify.go new file mode 100644 index 000000000000..28302a54aef0 --- /dev/null +++ b/etcdserver/verify/verify.go @@ -0,0 +1,114 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verify + +import ( + "fmt" + "os" + "path/filepath" + + "go.etcd.io/etcd/mvcc" + "go.etcd.io/etcd/mvcc/backend" + "go.etcd.io/etcd/pkg/fileutil" + "go.etcd.io/etcd/version" + "go.uber.org/zap" +) + +const ENV_VERIFY = "ETCD_VERIFY" +const ENV_VERIFY_ALL_VALUE = "all" + +type Config struct { + // DataDir is a root directory where the data being verified are stored. + DataDir string + + Logger *zap.Logger +} + +// Verify performs consistency checks of given etcd data-directory. +// The errors are reported as the returned error, but for some situations +// the function can also panic. +// The function is expected to work on not-in-use data model, i.e. +// no file-locks should be taken. Verify does not modified the data. +func Verify(cfg Config) error { + lg := cfg.Logger + if lg == nil { + lg = zap.NewNop() + } + + if !fileutil.Exist(toBackendFileName(cfg.DataDir)) { + lg.Info("verification skipped due to non exist db file") + return nil + } + + var err error + lg.Info("verification of persisted state", zap.String("data-dir", cfg.DataDir)) + defer func() { + if err != nil { + lg.Error("verification of persisted state failed", + zap.String("data-dir", cfg.DataDir), + zap.Error(err)) + } else if r := recover(); r != nil { + lg.Error("verification of persisted state failed", + zap.String("data-dir", cfg.DataDir)) + panic(r) + } else { + lg.Info("verification of persisted state successful", zap.String("data-dir", cfg.DataDir)) + } + }() + + beConfig := backend.DefaultBackendConfig() + beConfig.Path = toBackendFileName(cfg.DataDir) + beConfig.Logger = cfg.Logger + + be := backend.New(beConfig) + defer be.Close() + + err = validateSchema(lg, be) + return err +} + +// VerifyIfEnabled performs verification according to ETCD_VERIFY env settings. +// See Verify for more information. +func VerifyIfEnabled(cfg Config) error { + if os.Getenv(ENV_VERIFY) == ENV_VERIFY_ALL_VALUE { + return Verify(cfg) + } + return nil +} + +// MustVerifyIfEnabled performs verification according to ETCD_VERIFY env settings +// and exits in case of found problems. +// See Verify for more information. +func MustVerifyIfEnabled(cfg Config) { + if err := VerifyIfEnabled(cfg); err != nil { + cfg.Logger.Fatal("Verification failed", + zap.String("data-dir", cfg.DataDir), + zap.Error(err)) + } +} + +func validateSchema(lg *zap.Logger, be backend.Backend) error { + be.ReadTx().RLock() + defer be.ReadTx().RUnlock() + v := mvcc.UnsafeDetectSchemaVersion(lg, be.ReadTx()) + if !v.Equal(version.V3_4) { + return fmt.Errorf("detected unsupported data schema: %s", v.String()) + } + return nil +} + +func toBackendFileName(dataDir string) string { + return filepath.Join(dataDir, "member", "snap", "db") +} diff --git a/integration/cluster.go b/integration/cluster.go index 11d44c965417..7fc1ee369d0d 100644 --- a/integration/cluster.go +++ b/integration/cluster.go @@ -47,6 +47,7 @@ import ( lockpb "go.etcd.io/etcd/etcdserver/api/v3lock/v3lockpb" "go.etcd.io/etcd/etcdserver/api/v3rpc" pb "go.etcd.io/etcd/etcdserver/etcdserverpb" + "go.etcd.io/etcd/etcdserver/verify" "go.etcd.io/etcd/pkg/logutil" "go.etcd.io/etcd/pkg/testutil" "go.etcd.io/etcd/pkg/tlsutil" @@ -570,6 +571,7 @@ type member struct { useIP bool isLearner bool + closed bool } func (m *member) GRPCAddr() string { return m.grpcAddr } @@ -1045,6 +1047,15 @@ func (m *member) Close() { for _, f := range m.serverClosers { f() } + if !m.closed { + // Avoid verification of the same file multiple times + // (that might not exist any longer) + verify.MustVerifyIfEnabled(verify.Config{ + Logger: m.Logger, + DataDir: m.DataDir, + }) + } + m.closed = true } // Stop stops the member, but the data dir of the member is preserved. diff --git a/test b/test index d90f1133e3f2..6a916687c635 100755 --- a/test +++ b/test @@ -33,6 +33,8 @@ # $ COVERDIR=coverage PASSES="build_cov cov" ./test set -euo pipefail +export ETCD_VERIFY=all + source ./build PASSES=${PASSES:-}