From 1312bf5c8619bb956039dfb49a40a2b73c3e9c69 Mon Sep 17 00:00:00 2001 From: Siyuan Zhang Date: Tue, 6 Feb 2024 10:01:25 -0800 Subject: [PATCH] Add schema verification when closing etcd. Signed-off-by: Siyuan Zhang --- embed/etcd.go | 5 ++ etcdserver/verify/doc.go | 20 +++++++ etcdserver/verify/verify.go | 111 ++++++++++++++++++++++++++++++++++++ integration/cluster.go | 11 ++++ test | 2 + 5 files changed, 149 insertions(+) create mode 100644 etcdserver/verify/doc.go create mode 100644 etcdserver/verify/verify.go diff --git a/embed/etcd.go b/embed/etcd.go index fab7290e1a80..f4645c88de94 100644 --- a/embed/etcd.go +++ b/embed/etcd.go @@ -38,6 +38,7 @@ import ( "go.etcd.io/etcd/etcdserver/api/v2v3" "go.etcd.io/etcd/etcdserver/api/v3client" "go.etcd.io/etcd/etcdserver/api/v3rpc" + "go.etcd.io/etcd/etcdserver/verify" "go.etcd.io/etcd/pkg/debugutil" runtimeutil "go.etcd.io/etcd/pkg/runtime" "go.etcd.io/etcd/pkg/transport" @@ -374,6 +375,10 @@ func (e *Etcd) Close() { lg.Info("closing etcd server", fields...) } defer func() { + verify.MustVerifyIfEnabled(verify.Config{ + Logger: lg, + DataDir: e.cfg.Dir, + }) if lg != nil { lg.Info("closed etcd server", fields...) lg.Sync() diff --git a/etcdserver/verify/doc.go b/etcdserver/verify/doc.go new file mode 100644 index 000000000000..2c42bf6f1985 --- /dev/null +++ b/etcdserver/verify/doc.go @@ -0,0 +1,20 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verify + +// verify package is analyzing persistent state of etcd to find potential +// inconsistencies. +// In particular it covers cross-checking between different aspacts of etcd +// storage like WAL & Backend. diff --git a/etcdserver/verify/verify.go b/etcdserver/verify/verify.go new file mode 100644 index 000000000000..c913881e867c --- /dev/null +++ b/etcdserver/verify/verify.go @@ -0,0 +1,111 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verify + +import ( + "fmt" + "os" + "path/filepath" + + "go.etcd.io/etcd/mvcc" + "go.etcd.io/etcd/mvcc/backend" + "go.etcd.io/etcd/version" + "go.uber.org/zap" +) + +const ENV_VERIFY = "ETCD_VERIFY" +const ENV_VERIFY_ALL_VALUE = "all" + +type Config struct { + // DataDir is a root directory where the data being verified are stored. + DataDir string + + Logger *zap.Logger +} + +// Verify performs consistency checks of given etcd data-directory. +// The errors are reported as the returned error, but for some situations +// the function can also panic. +// The function is expected to work on not-in-use data model, i.e. +// no file-locks should be taken. Verify does not modified the data. +func Verify(cfg Config) error { + lg := cfg.Logger + if lg == nil { + lg = zap.NewNop() + } + + var err error + lg.Info("verification of persisted state", zap.String("data-dir", cfg.DataDir)) + defer func() { + if err != nil { + lg.Error("verification of persisted state failed", + zap.String("data-dir", cfg.DataDir), + zap.Error(err)) + } else if r := recover(); r != nil { + lg.Error("verification of persisted state failed", + zap.String("data-dir", cfg.DataDir)) + panic(r) + } else { + lg.Info("verification of persisted state successful", zap.String("data-dir", cfg.DataDir)) + } + }() + + beConfig := backend.DefaultBackendConfig() + beConfig.Path = toBackendFileName(cfg.DataDir) + beConfig.Logger = cfg.Logger + + be := backend.New(beConfig) + defer be.Close() + + err = validateSchema(lg, be) + return err +} + +// VerifyIfEnabled performs verification according to ETCD_VERIFY env settings. +// See Verify for more information. +func VerifyIfEnabled(cfg Config) error { + if os.Getenv(ENV_VERIFY) == ENV_VERIFY_ALL_VALUE { + return Verify(cfg) + } + return nil +} + +// MustVerifyIfEnabled performs verification according to ETCD_VERIFY env settings +// and exits in case of found problems. +// See Verify for more information. +func MustVerifyIfEnabled(cfg Config) { + if err := VerifyIfEnabled(cfg); err != nil { + cfg.Logger.Fatal("Verification failed", + zap.String("data-dir", cfg.DataDir), + zap.Error(err)) + } +} + +func validateSchema(lg *zap.Logger, be backend.Backend) error { + be.ReadTx().RLock() + defer be.ReadTx().RUnlock() + v, err := mvcc.UnsafeDetectSchemaVersion(lg, be.ReadTx()) + if err != nil { + return err + } + if !v.Equal(version.V3_4) { + return fmt.Errorf("detected unsupported data schema: %s", v.String()) + } + return nil +} + +func toBackendFileName(dataDir string) string { + return filepath.Join(dataDir, "member", "snap", "db") +} diff --git a/integration/cluster.go b/integration/cluster.go index 11d44c965417..7fc1ee369d0d 100644 --- a/integration/cluster.go +++ b/integration/cluster.go @@ -47,6 +47,7 @@ import ( lockpb "go.etcd.io/etcd/etcdserver/api/v3lock/v3lockpb" "go.etcd.io/etcd/etcdserver/api/v3rpc" pb "go.etcd.io/etcd/etcdserver/etcdserverpb" + "go.etcd.io/etcd/etcdserver/verify" "go.etcd.io/etcd/pkg/logutil" "go.etcd.io/etcd/pkg/testutil" "go.etcd.io/etcd/pkg/tlsutil" @@ -570,6 +571,7 @@ type member struct { useIP bool isLearner bool + closed bool } func (m *member) GRPCAddr() string { return m.grpcAddr } @@ -1045,6 +1047,15 @@ func (m *member) Close() { for _, f := range m.serverClosers { f() } + if !m.closed { + // Avoid verification of the same file multiple times + // (that might not exist any longer) + verify.MustVerifyIfEnabled(verify.Config{ + Logger: m.Logger, + DataDir: m.DataDir, + }) + } + m.closed = true } // Stop stops the member, but the data dir of the member is preserved. diff --git a/test b/test index d90f1133e3f2..6a916687c635 100755 --- a/test +++ b/test @@ -33,6 +33,8 @@ # $ COVERDIR=coverage PASSES="build_cov cov" ./test set -euo pipefail +export ETCD_VERIFY=all + source ./build PASSES=${PASSES:-}