Skip to content

Commit

Permalink
Set DualModeLog for both backup prepare and run stages.
Browse files Browse the repository at this point in the history
The Dual Mode logger(output log to both stdout and file)
was used only for the backup run stage. This makes the backup
preparation stage errors are not recorded for the backup.

Signed-off-by: Xun Jiang <blackpigletbruce@gmail.com>
  • Loading branch information
blackpiglet committed Jul 2, 2024
1 parent 28d64c2 commit 5e4c2a6
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 53 deletions.
108 changes: 69 additions & 39 deletions pkg/controller/backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,21 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
return ctrl.Result{}, nil
}

// Log the backup to both a backup log file and to stdout. This will help see what happened if the upload of the
// backup log failed for whatever reason.
backupLog, err := logging.NewTempFileLogger(
b.backupLogLevel,
b.formatFlag,
logging.NewLogHook(),
logrus.Fields{Backup: kubeutil.NamespaceAndName(original)},
)
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "error creating dual mode logger for backup")
}
defer backupLog.Dispose(b.logger.WithField(Backup, kubeutil.NamespaceAndName(original)))

log.Debug("Preparing backup request")
request := b.prepareBackupRequest(original, log)
request := b.prepareBackupRequest(original, backupLog)
if len(request.Status.ValidationErrors) > 0 {
request.Status.Phase = velerov1api.BackupPhaseFailedValidation
} else {
Expand Down Expand Up @@ -278,7 +291,7 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
b.metrics.RegisterBackupAttempt(backupScheduleName)

// execution & upload of backup
if err := b.runBackup(request); err != nil {
if err := b.runBackup(request, backupLog); err != nil {
// even though runBackup sets the backup's phase prior
// to uploading artifacts to object storage, we have to
// check for an error again here and update the phase if
Expand Down Expand Up @@ -464,7 +477,11 @@ func (b *backupReconciler) prepareBackupRequest(backup *velerov1api.Backup, logg
}

// validate the included/excluded namespaces
for _, err := range b.validateNamespaceIncludesExcludes(request.Spec.IncludedNamespaces, request.Spec.ExcludedNamespaces) {
for _, err := range b.validateNamespaceIncludesExcludes(
request.Spec.IncludedNamespaces,
request.Spec.ExcludedNamespaces,
logger,
) {
request.Status.ValidationErrors = append(request.Status.ValidationErrors, fmt.Sprintf("Invalid included/excluded namespace lists: %v", err))
}

Expand Down Expand Up @@ -596,7 +613,7 @@ func (b *backupReconciler) validateAndGetSnapshotLocations(backup *velerov1api.B
return providerLocations, nil
}

func (b *backupReconciler) validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces []string) []error {
func (b *backupReconciler) validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces []string, logger logrus.FieldLogger) []error {
var errs []error
if errs = collections.ValidateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces); len(errs) > 0 {
return errs
Expand All @@ -608,7 +625,12 @@ func (b *backupReconciler) validateNamespaceIncludesExcludes(includedNamespaces,
continue
}
if err := b.kbClient.Get(context.Background(), kbclient.ObjectKey{Name: name}, namespace); err != nil {
errs = append(errs, err)
if apierrors.IsNotFound(err) {
logger.Errorf("fail to get namespace %s specified in the includedNamespaces %s",
name, strings.Join(includedNamespaces, ","))
} else {
errs = append(errs, err)
}
}
}
return errs
Expand All @@ -618,36 +640,28 @@ func (b *backupReconciler) validateNamespaceIncludesExcludes(includedNamespaces,
// causes the backup to be Failed; if no error is returned, the backup's status's Errors
// field is checked to see if the backup was a partial failure.

func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error {
b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup)).Info("Setting up backup log")

// Log the backup to both a backup log file and to stdout. This will help see what happened if the upload of the
// backup log failed for whatever reason.
logCounter := logging.NewLogHook()
backupLog, err := logging.NewTempFileLogger(b.backupLogLevel, b.formatFlag, logCounter, logrus.Fields{Backup: kubeutil.NamespaceAndName(backup)})
if err != nil {
return errors.Wrap(err, "error creating dual mode logger for backup")
}
defer backupLog.Dispose(b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup)))

backupLog.Info("Setting up backup temp file")
func (b *backupReconciler) runBackup(
backup *pkgbackup.Request,
logger logging.DualModeLogger,
) error {
logger.Info("Setting up backup temp file")
backupFile, err := os.CreateTemp("", "")
if err != nil {
return errors.Wrap(err, "error creating temp file for backup")
}
defer closeAndRemoveFile(backupFile, backupLog)
defer closeAndRemoveFile(backupFile, logger)

backupLog.Info("Setting up plugin manager")
pluginManager := b.newPluginManager(backupLog)
logger.Info("Setting up plugin manager")
pluginManager := b.newPluginManager(logger)
defer pluginManager.CleanupClients()

backupLog.Info("Getting backup item actions")
logger.Info("Getting backup item actions")
actions, err := pluginManager.GetBackupItemActionsV2()
if err != nil {
return err
}
backupLog.Info("Setting up backup store to check for backup existence")
backupStore, err := b.backupStoreGetter.Get(backup.StorageLocation, pluginManager, backupLog)
logger.Info("Setting up backup store to check for backup existence")
backupStore, err := b.backupStoreGetter.Get(backup.StorageLocation, pluginManager, logger)
if err != nil {
return err
}
Expand All @@ -665,7 +679,7 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error {
backupItemActionsResolver := framework.NewBackupItemActionResolverV2(actions)

var fatalErrs []error
if err := b.backupper.BackupWithResolvers(backupLog, backup, backupFile, backupItemActionsResolver, pluginManager); err != nil {
if err := b.backupper.BackupWithResolvers(logger, backup, backupFile, backupItemActionsResolver, pluginManager); err != nil {
fatalErrs = append(fatalErrs, err)
}

Expand All @@ -677,7 +691,12 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error {
backup.Status.VolumeSnapshotsCompleted++
}
}
volumeSnapshots, volumeSnapshotContents, volumeSnapshotClasses := pkgbackup.GetBackupCSIResources(b.kbClient, b.globalCRClient, backup.Backup, backupLog)
volumeSnapshots, volumeSnapshotContents, volumeSnapshotClasses := pkgbackup.GetBackupCSIResources(
b.kbClient,
b.globalCRClient,
backup.Backup,
logger,
)
// Update CSIVolumeSnapshotsAttempted
backup.Status.CSIVolumeSnapshotsAttempted = len(volumeSnapshots)

Expand All @@ -688,25 +707,25 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error {
inProgressOperations, _, opsCompleted, opsFailed, errs := getBackupItemOperationProgress(backup.Backup, pluginManager, *backup.GetItemOperationsList())
if len(errs) > 0 {
for _, err := range errs {
backupLog.Error(err)
logger.Error(err)
}
}

backup.Status.BackupItemOperationsAttempted = len(*backup.GetItemOperationsList())
backup.Status.BackupItemOperationsCompleted = opsCompleted
backup.Status.BackupItemOperationsFailed = opsFailed

backup.Status.Warnings = logCounter.GetCount(logrus.WarnLevel)
backup.Status.Errors = logCounter.GetCount(logrus.ErrorLevel)
backup.Status.Warnings = logger.GetLogEntryCountByLevel(logrus.WarnLevel)
backup.Status.Errors = logger.GetLogEntryCountByLevel(logrus.ErrorLevel)

backupWarnings := logCounter.GetEntries(logrus.WarnLevel)
backupErrors := logCounter.GetEntries(logrus.ErrorLevel)
backupWarnings := logger.GetLogEntryByLevel(logrus.WarnLevel)
backupErrors := logger.GetLogEntryByLevel(logrus.ErrorLevel)
results := map[string]results.Result{
"warnings": backupWarnings,
"errors": backupErrors,
}

backupLog.DoneForPersist(b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup)))
logger.DoneForPersist(b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup)))

// Assign finalize phase as close to end as possible so that any errors
// logged to backupLog are captured. This is done before uploading the
Expand All @@ -715,7 +734,7 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error {
switch {
case len(fatalErrs) > 0:
backup.Status.Phase = velerov1api.BackupPhaseFailed
case logCounter.GetCount(logrus.ErrorLevel) > 0:
case logger.GetLogEntryCountByLevel(logrus.ErrorLevel) > 0:
if inProgressOperations {
backup.Status.Phase = velerov1api.BackupPhaseWaitingForPluginOperationsPartiallyFailed
} else {
Expand All @@ -735,25 +754,36 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error {
backup.Status.Phase == velerov1api.BackupPhaseCompleted {
backup.Status.CompletionTimestamp = &metav1.Time{Time: b.clock.Now()}
}
recordBackupMetrics(backupLog, backup.Backup, backupFile, b.metrics, false)
recordBackupMetrics(logger, backup.Backup, backupFile, b.metrics, false)

// re-instantiate the backup store because credentials could have changed since the original
// instantiation, if this was a long-running backup
backupLog.Info("Setting up backup store to persist the backup")
backupStore, err = b.backupStoreGetter.Get(backup.StorageLocation, pluginManager, backupLog)
logger.Info("Setting up backup store to persist the backup")
backupStore, err = b.backupStoreGetter.Get(backup.StorageLocation, pluginManager, logger)
if err != nil {
return err
}

if logFile, err := backupLog.GetPersistFile(); err != nil {
if logFile, err := logger.GetPersistFile(); err != nil {
fatalErrs = append(fatalErrs, errors.Wrap(err, "error getting backup log file"))
} else {
if errs := persistBackup(backup, backupFile, logFile, backupStore, volumeSnapshots, volumeSnapshotContents, volumeSnapshotClasses, results, b.globalCRClient, backupLog); len(errs) > 0 {
if errs := persistBackup(
backup,
backupFile,
logFile,
backupStore,
volumeSnapshots,
volumeSnapshotContents,
volumeSnapshotClasses,
results,
b.globalCRClient,
logger,
); len(errs) > 0 {
fatalErrs = append(fatalErrs, errs...)
}
}

b.logger.WithField(Backup, kubeutil.NamespaceAndName(backup)).Infof("Initial backup processing complete, moving to %s", backup.Status.Phase)
logger.WithField(Backup, kubeutil.NamespaceAndName(backup)).Infof("Initial backup processing complete, moving to %s", backup.Status.Phase)

// if we return a non-nil error, the calling function will update
// the backup's phase to Failed.
Expand Down
11 changes: 6 additions & 5 deletions pkg/controller/backup_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1575,6 +1575,7 @@ func TestValidateAndGetSnapshotLocations(t *testing.T) {
}

func TestValidateNamespaceIncludesExcludes(t *testing.T) {
logger := logging.DefaultLogger(logrus.DebugLevel, logging.FormatText)
namespace := builder.ForNamespace("default").Result()
reconciler := &backupReconciler{
kbClient: velerotest.NewFakeControllerRuntimeClient(t, namespace),
Expand All @@ -1583,31 +1584,31 @@ func TestValidateNamespaceIncludesExcludes(t *testing.T) {
// empty string as includedNamespaces
includedNamespaces := []string{""}
excludedNamespaces := []string{"test"}
errs := reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces)
errs := reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger)
assert.Empty(t, errs)

// "*" as includedNamespaces
includedNamespaces = []string{"*"}
excludedNamespaces = []string{"test"}
errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces)
errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger)
assert.Empty(t, errs)

// invalid namespaces
includedNamespaces = []string{"1@#"}
excludedNamespaces = []string{"2@#"}
errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces)
errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger)
assert.Len(t, errs, 2)

// not exist namespaces
includedNamespaces = []string{"non-existing-namespace"}
excludedNamespaces = []string{}
errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces)
errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger)
assert.Len(t, errs, 1)

// valid namespaces
includedNamespaces = []string{"default"}
excludedNamespaces = []string{}
errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces)
errs = reconciler.validateNamespaceIncludesExcludes(includedNamespaces, excludedNamespaces, logger)
assert.Empty(t, errs)
}

Expand Down
33 changes: 32 additions & 1 deletion pkg/util/logging/dual_mode_logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/vmware-tanzu/velero/pkg/util/results"
)

// DualModeLogger is a thread safe logger interface to write logs to dual targets, one of which
Expand All @@ -35,6 +36,10 @@ type DualModeLogger interface {
GetPersistFile() (*os.File, error)
// Dispose closes the temp file pointer and removes the file
Dispose(log logrus.FieldLogger)

GetLogEntryByLevel(level logrus.Level) results.Result

GetLogEntryCountByLevel(level logrus.Level) int
}

type tempFileLogger struct {
Expand All @@ -45,7 +50,7 @@ type tempFileLogger struct {
}

// NewTempFileLogger creates a DualModeLogger instance that writes logs to both Stdout and a file in the temp folder.
func NewTempFileLogger(logLevel logrus.Level, logFormat Format, hook *LogHook, fields logrus.Fields) (DualModeLogger, error) {
func NewTempFileLogger(logLevel logrus.Level, logFormat Format, hook *LogCountHook, fields logrus.Fields) (DualModeLogger, error) {
file, err := os.CreateTemp("", "")
if err != nil {
return nil, errors.Wrap(err, "error creating temp file")
Expand Down Expand Up @@ -101,3 +106,29 @@ func closeAndRemoveFile(file *os.File, log logrus.FieldLogger) {
log.WithError(err).WithField("file", file.Name()).Warn("error removing temp log file")
}
}

func (p *tempFileLogger) GetLogEntryCountByLevel(level logrus.Level) int {
levelHooks := p.logger.Hooks[level]
for _, hook := range levelHooks {
logCountHook, ok := hook.(*LogCountHook)
if ok {
return logCountHook.GetCount(level)
}
}

return 0
}

func (p *tempFileLogger) GetLogEntryByLevel(level logrus.Level) results.Result {
var result results.Result

levelHooks := p.logger.Hooks[level]
for _, hook := range levelHooks {
logCountHook, ok := hook.(*LogCountHook)
if ok {
return logCountHook.GetEntries(level)
}
}

return result
}
16 changes: 8 additions & 8 deletions pkg/util/logging/log_counter_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,30 @@ import (
"github.com/vmware-tanzu/velero/pkg/util/results"
)

// LogHook is a logrus hook that counts the number of log
// LogCountHook is a logrus hook that counts the number of log
// statements that have been written at each logrus level. It also
// maintains log entries at each logrus level in result structure.
type LogHook struct {
type LogCountHook struct {
mu sync.RWMutex
counts map[logrus.Level]int
entries map[logrus.Level]*results.Result
}

// NewLogHook returns a pointer to an initialized LogHook.
func NewLogHook() *LogHook {
return &LogHook{
func NewLogHook() *LogCountHook {
return &LogCountHook{
counts: make(map[logrus.Level]int),
entries: make(map[logrus.Level]*results.Result),
}
}

// Levels returns the logrus levels that the hook should be fired for.
func (h *LogHook) Levels() []logrus.Level {
func (h *LogCountHook) Levels() []logrus.Level {
return logrus.AllLevels
}

// Fire executes the hook's logic.
func (h *LogHook) Fire(entry *logrus.Entry) error {
func (h *LogCountHook) Fire(entry *logrus.Entry) error {
h.mu.Lock()
defer h.mu.Unlock()

Expand Down Expand Up @@ -93,7 +93,7 @@ func (h *LogHook) Fire(entry *logrus.Entry) error {

// GetCount returns the number of log statements that have been
// written at the specific level provided.
func (h *LogHook) GetCount(level logrus.Level) int {
func (h *LogCountHook) GetCount(level logrus.Level) int {
h.mu.RLock()
defer h.mu.RUnlock()

Expand All @@ -102,7 +102,7 @@ func (h *LogHook) GetCount(level logrus.Level) int {

// GetEntries returns the log statements that have been
// written at the specific level provided.
func (h *LogHook) GetEntries(level logrus.Level) results.Result {
func (h *LogCountHook) GetEntries(level logrus.Level) results.Result {
h.mu.RLock()
defer h.mu.RUnlock()
response, isPresent := h.entries[level]
Expand Down

0 comments on commit 5e4c2a6

Please sign in to comment.