From 5e4c2a6cd9e34a72a8a0a59832cbf4e607e130b2 Mon Sep 17 00:00:00 2001 From: Xun Jiang Date: Tue, 2 Jul 2024 15:12:08 +0800 Subject: [PATCH] Set DualModeLog for both backup prepare and run stages. The Dual Mode logger(output log to both stdout and file) was used only for the backup run stage. This makes the backup preparation stage errors are not recorded for the backup. Signed-off-by: Xun Jiang --- pkg/controller/backup_controller.go | 108 +++++++++++++++-------- pkg/controller/backup_controller_test.go | 11 +-- pkg/util/logging/dual_mode_logger.go | 33 ++++++- pkg/util/logging/log_counter_hook.go | 16 ++-- 4 files changed, 115 insertions(+), 53 deletions(-) diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index 8ccd8db188..c39b12edc2 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -238,8 +238,21 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr return ctrl.Result{}, nil } + // Log the backup to both a backup log file and to stdout. This will help see what happened if the upload of the + // backup log failed for whatever reason. + backupLog, err := logging.NewTempFileLogger( + b.backupLogLevel, + b.formatFlag, + logging.NewLogHook(), + logrus.Fields{Backup: kubeutil.NamespaceAndName(original)}, + ) + if err != nil { + return ctrl.Result{}, errors.Wrap(err, "error creating dual mode logger for backup") + } + defer backupLog.Dispose(b.logger.WithField(Backup, kubeutil.NamespaceAndName(original))) + log.Debug("Preparing backup request") - request := b.prepareBackupRequest(original, log) + request := b.prepareBackupRequest(original, backupLog) if len(request.Status.ValidationErrors) > 0 { request.Status.Phase = velerov1api.BackupPhaseFailedValidation } else { @@ -278,7 +291,7 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr b.metrics.RegisterBackupAttempt(backupScheduleName) // execution & upload of backup - if err := b.runBackup(request); err != nil { + if err := b.runBackup(request, backupLog); err != nil { // even though runBackup sets the backup's phase prior // to uploading artifacts to object storage, we have to // check for an error again here and update the phase if @@ -464,7 +477,11 @@ func (b *backupReconciler) prepareBackupRequest(backup *velerov1api.Backup, logg } // validate the included/excluded namespaces - for _, err := range b.validateNamespaceIncludesExcludes(request.Spec.IncludedNamespaces, request.Spec.ExcludedNamespaces) { + for _, err := range b.validateNamespaceIncludesExcludes( + request.Spec.IncludedNamespaces, + request.Spec.ExcludedNamespaces, + logger, + ) { request.Status.ValidationErrors = append(request.Status.ValidationErrors, fmt.Sprintf("Invalid included/excluded namespace lists: %v", err)) } @@ -596,7 +613,7 @@ func (b *backupReconciler) validateAndGetSnapshotLocations(backup *velerov1api.B return providerLocations, nil } -func (b *backupReconciler) validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces []string) []error { +func (b *backupReconciler) validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces []string, logger logrus.FieldLogger) []error { var errs []error if errs = collections.ValidateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces); len(errs) > 0 { return errs @@ -608,7 +625,12 @@ func (b *backupReconciler) validateNamespaceIncludesExcludes(includedNamespaces, continue } if err := b.kbClient.Get(context.Background(), kbclient.ObjectKey{Name: name}, namespace); err != nil { - errs = append(errs, err) + if apierrors.IsNotFound(err) { + logger.Errorf("fail to get namespace %s specified in the includedNamespaces %s", + name, strings.Join(includedNamespaces, ",")) + } else { + errs = append(errs, err) + } } } return errs @@ -618,36 +640,28 @@ func (b *backupReconciler) validateNamespaceIncludesExcludes(includedNamespaces, // causes the backup to be Failed; if no error is returned, the backup's status's Errors // field is checked to see if the backup was a partial failure. -func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { - b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup)).Info("Setting up backup log") - - // Log the backup to both a backup log file and to stdout. This will help see what happened if the upload of the - // backup log failed for whatever reason. - logCounter := logging.NewLogHook() - backupLog, err := logging.NewTempFileLogger(b.backupLogLevel, b.formatFlag, logCounter, logrus.Fields{Backup: kubeutil.NamespaceAndName(backup)}) - if err != nil { - return errors.Wrap(err, "error creating dual mode logger for backup") - } - defer backupLog.Dispose(b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup))) - - backupLog.Info("Setting up backup temp file") +func (b *backupReconciler) runBackup( + backup *pkgbackup.Request, + logger logging.DualModeLogger, +) error { + logger.Info("Setting up backup temp file") backupFile, err := os.CreateTemp("", "") if err != nil { return errors.Wrap(err, "error creating temp file for backup") } - defer closeAndRemoveFile(backupFile, backupLog) + defer closeAndRemoveFile(backupFile, logger) - backupLog.Info("Setting up plugin manager") - pluginManager := b.newPluginManager(backupLog) + logger.Info("Setting up plugin manager") + pluginManager := b.newPluginManager(logger) defer pluginManager.CleanupClients() - backupLog.Info("Getting backup item actions") + logger.Info("Getting backup item actions") actions, err := pluginManager.GetBackupItemActionsV2() if err != nil { return err } - backupLog.Info("Setting up backup store to check for backup existence") - backupStore, err := b.backupStoreGetter.Get(backup.StorageLocation, pluginManager, backupLog) + logger.Info("Setting up backup store to check for backup existence") + backupStore, err := b.backupStoreGetter.Get(backup.StorageLocation, pluginManager, logger) if err != nil { return err } @@ -665,7 +679,7 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { backupItemActionsResolver := framework.NewBackupItemActionResolverV2(actions) var fatalErrs []error - if err := b.backupper.BackupWithResolvers(backupLog, backup, backupFile, backupItemActionsResolver, pluginManager); err != nil { + if err := b.backupper.BackupWithResolvers(logger, backup, backupFile, backupItemActionsResolver, pluginManager); err != nil { fatalErrs = append(fatalErrs, err) } @@ -677,7 +691,12 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { backup.Status.VolumeSnapshotsCompleted++ } } - volumeSnapshots, volumeSnapshotContents, volumeSnapshotClasses := pkgbackup.GetBackupCSIResources(b.kbClient, b.globalCRClient, backup.Backup, backupLog) + volumeSnapshots, volumeSnapshotContents, volumeSnapshotClasses := pkgbackup.GetBackupCSIResources( + b.kbClient, + b.globalCRClient, + backup.Backup, + logger, + ) // Update CSIVolumeSnapshotsAttempted backup.Status.CSIVolumeSnapshotsAttempted = len(volumeSnapshots) @@ -688,7 +707,7 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { inProgressOperations, _, opsCompleted, opsFailed, errs := getBackupItemOperationProgress(backup.Backup, pluginManager, *backup.GetItemOperationsList()) if len(errs) > 0 { for _, err := range errs { - backupLog.Error(err) + logger.Error(err) } } @@ -696,17 +715,17 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { backup.Status.BackupItemOperationsCompleted = opsCompleted backup.Status.BackupItemOperationsFailed = opsFailed - backup.Status.Warnings = logCounter.GetCount(logrus.WarnLevel) - backup.Status.Errors = logCounter.GetCount(logrus.ErrorLevel) + backup.Status.Warnings = logger.GetLogEntryCountByLevel(logrus.WarnLevel) + backup.Status.Errors = logger.GetLogEntryCountByLevel(logrus.ErrorLevel) - backupWarnings := logCounter.GetEntries(logrus.WarnLevel) - backupErrors := logCounter.GetEntries(logrus.ErrorLevel) + backupWarnings := logger.GetLogEntryByLevel(logrus.WarnLevel) + backupErrors := logger.GetLogEntryByLevel(logrus.ErrorLevel) results := map[string]results.Result{ "warnings": backupWarnings, "errors": backupErrors, } - backupLog.DoneForPersist(b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup))) + logger.DoneForPersist(b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup))) // Assign finalize phase as close to end as possible so that any errors // logged to backupLog are captured. This is done before uploading the @@ -715,7 +734,7 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { switch { case len(fatalErrs) > 0: backup.Status.Phase = velerov1api.BackupPhaseFailed - case logCounter.GetCount(logrus.ErrorLevel) > 0: + case logger.GetLogEntryCountByLevel(logrus.ErrorLevel) > 0: if inProgressOperations { backup.Status.Phase = velerov1api.BackupPhaseWaitingForPluginOperationsPartiallyFailed } else { @@ -735,25 +754,36 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { backup.Status.Phase == velerov1api.BackupPhaseCompleted { backup.Status.CompletionTimestamp = &metav1.Time{Time: b.clock.Now()} } - recordBackupMetrics(backupLog, backup.Backup, backupFile, b.metrics, false) + recordBackupMetrics(logger, backup.Backup, backupFile, b.metrics, false) // re-instantiate the backup store because credentials could have changed since the original // instantiation, if this was a long-running backup - backupLog.Info("Setting up backup store to persist the backup") - backupStore, err = b.backupStoreGetter.Get(backup.StorageLocation, pluginManager, backupLog) + logger.Info("Setting up backup store to persist the backup") + backupStore, err = b.backupStoreGetter.Get(backup.StorageLocation, pluginManager, logger) if err != nil { return err } - if logFile, err := backupLog.GetPersistFile(); err != nil { + if logFile, err := logger.GetPersistFile(); err != nil { fatalErrs = append(fatalErrs, errors.Wrap(err, "error getting backup log file")) } else { - if errs := persistBackup(backup, backupFile, logFile, backupStore, volumeSnapshots, volumeSnapshotContents, volumeSnapshotClasses, results, b.globalCRClient, backupLog); len(errs) > 0 { + if errs := persistBackup( + backup, + backupFile, + logFile, + backupStore, + volumeSnapshots, + volumeSnapshotContents, + volumeSnapshotClasses, + results, + b.globalCRClient, + logger, + ); len(errs) > 0 { fatalErrs = append(fatalErrs, errs...) } } - b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup)).Infof("Initial backup processing complete, moving to %s", backup.Status.Phase) + logger.WithField(Backup, kubeutil.NamespaceAndName(backup)).Infof("Initial backup processing complete, moving to %s", backup.Status.Phase) // if we return a non-nil error, the calling function will update // the backup's phase to Failed. diff --git a/pkg/controller/backup_controller_test.go b/pkg/controller/backup_controller_test.go index 2f613af2cb..92db75a81c 100644 --- a/pkg/controller/backup_controller_test.go +++ b/pkg/controller/backup_controller_test.go @@ -1575,6 +1575,7 @@ func TestValidateAndGetSnapshotLocations(t *testing.T) { } func TestValidateNamespaceIncludesExcludes(t *testing.T) { + logger := logging.DefaultLogger(logrus.DebugLevel, logging.FormatText) namespace := builder.ForNamespace("default").Result() reconciler := &backupReconciler{ kbClient: velerotest.NewFakeControllerRuntimeClient(t, namespace), @@ -1583,31 +1584,31 @@ func TestValidateNamespaceIncludesExcludes(t *testing.T) { // empty string as includedNamespaces includedNamespaces := []string{""} excludedNamespaces := []string{"test"} - errs := reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces) + errs := reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger) assert.Empty(t, errs) // "*" as includedNamespaces includedNamespaces = []string{"*"} excludedNamespaces = []string{"test"} - errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces) + errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger) assert.Empty(t, errs) // invalid namespaces includedNamespaces = []string{"1@#"} excludedNamespaces = []string{"2@#"} - errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces) + errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger) assert.Len(t, errs, 2) // not exist namespaces includedNamespaces = []string{"non-existing-namespace"} excludedNamespaces = []string{} - errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces) + errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger) assert.Len(t, errs, 1) // valid namespaces includedNamespaces = []string{"default"} excludedNamespaces = []string{} - errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces) + errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger) assert.Empty(t, errs) } diff --git a/pkg/util/logging/dual_mode_logger.go b/pkg/util/logging/dual_mode_logger.go index f5533c8eb0..56dacca4fa 100644 --- a/pkg/util/logging/dual_mode_logger.go +++ b/pkg/util/logging/dual_mode_logger.go @@ -23,6 +23,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" + "github.com/vmware-tanzu/velero/pkg/util/results" ) // DualModeLogger is a thread safe logger interface to write logs to dual targets, one of which @@ -35,6 +36,10 @@ type DualModeLogger interface { GetPersistFile() (*os.File, error) // Dispose closes the temp file pointer and removes the file Dispose(log logrus.FieldLogger) + + GetLogEntryByLevel(level logrus.Level) results.Result + + GetLogEntryCountByLevel(level logrus.Level) int } type tempFileLogger struct { @@ -45,7 +50,7 @@ type tempFileLogger struct { } // NewTempFileLogger creates a DualModeLogger instance that writes logs to both Stdout and a file in the temp folder. -func NewTempFileLogger(logLevel logrus.Level, logFormat Format, hook *LogHook, fields logrus.Fields) (DualModeLogger, error) { +func NewTempFileLogger(logLevel logrus.Level, logFormat Format, hook *LogCountHook, fields logrus.Fields) (DualModeLogger, error) { file, err := os.CreateTemp("", "") if err != nil { return nil, errors.Wrap(err, "error creating temp file") @@ -101,3 +106,29 @@ func closeAndRemoveFile(file *os.File, log logrus.FieldLogger) { log.WithError(err).WithField("file", file.Name()).Warn("error removing temp log file") } } + +func (p *tempFileLogger) GetLogEntryCountByLevel(level logrus.Level) int { + levelHooks := p.logger.Hooks[level] + for _, hook := range levelHooks { + logCountHook, ok := hook.(*LogCountHook) + if ok { + return logCountHook.GetCount(level) + } + } + + return 0 +} + +func (p *tempFileLogger) GetLogEntryByLevel(level logrus.Level) results.Result { + var result results.Result + + levelHooks := p.logger.Hooks[level] + for _, hook := range levelHooks { + logCountHook, ok := hook.(*LogCountHook) + if ok { + return logCountHook.GetEntries(level) + } + } + + return result +} diff --git a/pkg/util/logging/log_counter_hook.go b/pkg/util/logging/log_counter_hook.go index 63c9342df3..8f2409bc1d 100644 --- a/pkg/util/logging/log_counter_hook.go +++ b/pkg/util/logging/log_counter_hook.go @@ -26,30 +26,30 @@ import ( "github.com/vmware-tanzu/velero/pkg/util/results" ) -// LogHook is a logrus hook that counts the number of log +// LogCountHook is a logrus hook that counts the number of log // statements that have been written at each logrus level. It also // maintains log entries at each logrus level in result structure. -type LogHook struct { +type LogCountHook struct { mu sync.RWMutex counts map[logrus.Level]int entries map[logrus.Level]*results.Result } // NewLogHook returns a pointer to an initialized LogHook. -func NewLogHook() *LogHook { - return &LogHook{ +func NewLogHook() *LogCountHook { + return &LogCountHook{ counts: make(map[logrus.Level]int), entries: make(map[logrus.Level]*results.Result), } } // Levels returns the logrus levels that the hook should be fired for. -func (h *LogHook) Levels() []logrus.Level { +func (h *LogCountHook) Levels() []logrus.Level { return logrus.AllLevels } // Fire executes the hook's logic. -func (h *LogHook) Fire(entry *logrus.Entry) error { +func (h *LogCountHook) Fire(entry *logrus.Entry) error { h.mu.Lock() defer h.mu.Unlock() @@ -93,7 +93,7 @@ func (h *LogHook) Fire(entry *logrus.Entry) error { // GetCount returns the number of log statements that have been // written at the specific level provided. -func (h *LogHook) GetCount(level logrus.Level) int { +func (h *LogCountHook) GetCount(level logrus.Level) int { h.mu.RLock() defer h.mu.RUnlock() @@ -102,7 +102,7 @@ func (h *LogHook) GetCount(level logrus.Level) int { // GetEntries returns the log statements that have been // written at the specific level provided. -func (h *LogHook) GetEntries(level logrus.Level) results.Result { +func (h *LogCountHook) GetEntries(level logrus.Level) results.Result { h.mu.RLock() defer h.mu.RUnlock() response, isPresent := h.entries[level]