From 9cb6fd527deff4e06cf2f167445bf5253cccaf49 Mon Sep 17 00:00:00 2001 From: Joe Betz Date: Wed, 12 Feb 2020 10:12:48 -0800 Subject: [PATCH] mvcc/backend: Delete orphaned db.tmp files before defrag --- etcdserver/api/snap/snapshotter.go | 19 +++++++++++++++++++ mvcc/backend/backend.go | 20 ++++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/etcdserver/api/snap/snapshotter.go b/etcdserver/api/snap/snapshotter.go index a933b3519b3d..87f33e59805e 100644 --- a/etcdserver/api/snap/snapshotter.go +++ b/etcdserver/api/snap/snapshotter.go @@ -206,6 +206,9 @@ func (s *Snapshotter) snapNames() ([]string, error) { if err != nil { return nil, err } + if err = s.cleanupSnapdir(names); err != nil { + return nil, err + } snaps := checkSuffix(s.lg, names) if len(snaps) == 0 { return nil, ErrNoSnapshot @@ -231,3 +234,19 @@ func checkSuffix(lg *zap.Logger, names []string) []string { } return snaps } + +// cleanupSnapdir removes any files that should not be in the snapshot directory: +// - db.tmp prefixed files that can be orphaned by defragmentation +func (s *Snapshotter) cleanupSnapdir(filenames []string) error { + for _, filename := range filenames { + if strings.HasPrefix(filename, "db.tmp") { + if s.lg != nil { + s.lg.Info("found orphaned defragmentation file; deleting", zap.String("path", filename)) + if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) { + return fmt.Errorf("failed to remove orphaned defragmentation file %s: %v", filename, rmErr) + } + } + } + } + return nil +} diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index 63a4a2e9998d..77e2d0dd986b 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -358,13 +358,24 @@ func (b *backend) defrag() error { b.batchTx.tx = nil - tmpdb, err := bolt.Open(b.db.Path()+".tmp", 0600, boltOpenOptions) + // Create a temporary file to ensure we start with a clean slate. + // Snapshotter.cleanupSnapdir cleans up any of these that are found during startup. + dir := filepath.Dir(b.db.Path()) + temp, err := ioutil.TempFile(dir, "db.tmp.*") + if err != nil { + return err + } + options := *boltOpenOptions + options.OpenFile = func(path string, i int, mode os.FileMode) (file *os.File, err error) { + return temp, nil + } + tdbp := temp.Name() + tmpdb, err := bolt.Open(tdbp, 0600, &options) if err != nil { return err } dbp := b.db.Path() - tdbp := tmpdb.Path() size1, sizeInUse1 := b.Size(), b.SizeInUse() if b.lg != nil { b.lg.Info( @@ -376,12 +387,12 @@ func (b *backend) defrag() error { zap.String("current-db-size-in-use", humanize.Bytes(uint64(sizeInUse1))), ) } - + // gofail: var defragBeforeCopy struct{} err = defragdb(b.db, tmpdb, defragLimit) if err != nil { tmpdb.Close() if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil { - b.lg.Error("failed to remove dirs under tmpdb", zap.Error(rmErr)) + b.lg.Error("failed to remove db.tmp after defragmentation completed", zap.Error(rmErr)) } return err } @@ -394,6 +405,7 @@ func (b *backend) defrag() error { if err != nil { b.lg.Fatal("failed to close tmp database", zap.Error(err)) } + // gofail: var defragBeforeRename struct{} err = os.Rename(tdbp, dbp) if err != nil { b.lg.Fatal("failed to rename tmp database", zap.Error(err))