Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweak ETXTBSY retry, and be helpful for ENOENT #2736

Merged
merged 2 commits into from
Apr 22, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 46 additions & 8 deletions internal/job/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"slices"
"strconv"
"strings"
"syscall"
"time"

"github.com/buildkite/agent/v3/agent/plugin"
Expand Down Expand Up @@ -421,6 +422,31 @@ func logOpenedHookInfo(l shell.Logger, debug bool, hookName, path string) {
}
}

func logMissingHookInfo(l shell.Logger, wrapperPath string) {
// It's unlikely, but possible, that the script wrapper was spontaneously
// deleted or corrupted (it's usually in /tmp, which is fair game).
// A common setup error is to try to run a Bash hook in a container or other
// environment without Bash (or Bash is not in the expected location).
shebang, err := shellscript.ShebangLine(wrapperPath)
if err != nil {
// It's reasonable to assume the script wrapper was spontaneously
// deleted, or had something equally horrible happen to it.
l.Errorf("The %s hook failed to run - perhaps the wrapper script %q was spontaneously deleted", wrapperPath)
return
}
interpreter := strings.TrimPrefix(shebang, "#!")
if interpreter == "" {
// Either the script never had a shebang, or the script was
// spontaneously corrupted.
// If it didn't have a shebang line, we defaulted to using Bash, and if
// that's not present we already logged a warning.
// If it was spontaneously corrupted, we should expect a different error
// than ENOENT.
return
}
l.Errorf("The %s hook failed to run - perhaps the script interpreter %q is missing", interpreter)
}

func (e *Executor) runWrappedShellScriptHook(ctx context.Context, hookName string, hookCfg HookConfig) error {
defer e.redactors.Flush()

Expand Down Expand Up @@ -449,16 +475,20 @@ func (e *Executor) runWrappedShellScriptHook(ctx context.Context, hookName strin
e.shell.Promptf("%s", process.FormatCommand(cleanHookPath, []string{}))
}

const maxHookRetry = 3
const maxHookRetry = 30

// Run the wrapper script
if err := roko.NewRetrier(
roko.WithStrategy(roko.Constant(time.Second)),
roko.WithStrategy(roko.Constant(100*time.Millisecond)),
roko.WithMaxAttempts(maxHookRetry),
).DoWithContext(ctx, func(r *roko.Retrier) error {
// Run the script and only retry on fork/exec errors
// Run the script and only retry on ETXTBSY.
// This error occurs because of an unavoidable race between forking
// (which acquires open file descriptors of the parent process) and
// writing an executable (the script wrapper).
// See https://github.com/golang/go/issues/22315.
err := e.shell.RunScript(ctx, script.Path(), hookCfg.Env)
if perr := new(os.PathError); errors.As(err, &perr) && perr.Op == "fork/exec" {
if errors.Is(err, syscall.ETXTBSY) {
return err
}
r.Break()
Expand All @@ -476,10 +506,18 @@ func (e *Executor) runWrappedShellScriptHook(ctx context.Context, hookName strin
}
}

// If the error is from fork/exec, then inspect the file to see why it failed to be executed,
// even after the retry
if perr := new(os.PathError); errors.As(err, &perr) && perr.Op == "fork/exec" {
logOpenedHookInfo(e.shell.Logger, e.Debug, hookName, perr.Path)
switch {
case errors.Is(err, syscall.ETXTBSY):
// If the underlying error is _still_ ETXTBSY, then inspect the file
// to see what process had it open for write, to log something helpful
logOpenedHookInfo(e.shell.Logger, e.Debug, hookName, script.Path())

case errors.Is(err, syscall.ENOENT):
// Unfortunately the wrapping os.PathError's Path is always the
// program we tried to exec, even if the missing file/directory was
// actually the interpreter specified on the shebang line.
// Try to figure out which part is missing from the wrapper.
logMissingHookInfo(e.shell.Logger, script.Path())
}

return err
Expand Down