Skip to content

Commit

Permalink
cmd/watchflakes: report consistent failures at top
Browse files Browse the repository at this point in the history
Currently, watchflakes skips consistent failures, including ones
at the top, i.e. failing the latest a few commits. Consistently
failing at tip means builds are broken recently on this builder.
Currently we rely on human watching the dashboard to report such
failures. This CL lets watchflakes to do so. If a new issue is
opened for such failures, the title includes "[consistent failure]"
to increase awareness.

Fixes golang/go#58819.

Change-Id: I2ea89a5d8edee0b9087f03b58e4ff834c03e826d
Reviewed-on: https://go-review.googlesource.com/c/build/+/601439
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
  • Loading branch information
cherrymui committed Aug 2, 2024
1 parent c640e78 commit 1cfd0d1
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 4 deletions.
1 change: 1 addition & 0 deletions cmd/watchflakes/luci.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ type BuildResult struct {
StepLogURL string // textual log of the (last) failed step, if any
StepLogText string
Failures []*Failure
Top bool // whether this is a consistent failure at the top (tip)
}

type Commit struct {
Expand Down
40 changes: 36 additions & 4 deletions cmd/watchflakes/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ const timeLimit = 45 * 24 * time.Hour

const maxFailPerBuild = 3

const tooManyToBeFlakes = 4

var (
build = flag.String("build", "", "a particular build ID or URL to analyze (mainly for debugging)")
md = flag.Bool("md", false, "print Markdown output suitable for GitHub issues")
Expand Down Expand Up @@ -228,6 +230,20 @@ Repeat:
}
}
}
for _, issue := range issues {
if issue.Number == 0 && len(issue.Post) >= tooManyToBeFlakes && issue.Post[0].Top {
// New issue. Check if it is failing consistently at top.
top := 0
for _, fp := range issue.Post {
if fp.Top {
top++
}
}
if top >= tooManyToBeFlakes {
issue.Title += " [consistent failure]"
}
}
}

if query != nil {
format := (*FailurePost).Text
Expand Down Expand Up @@ -324,13 +340,15 @@ func skipBrokenCommits(boards []*Dashboard) {
// skipBrokenBuilders identifies builders that were consistently broken
// (at least tooManyToBeFlakes failures in a row) and then turned ok.
// It changes those consistent failures to SKIP.
//
// It does not skip consistent failures at the top (latest few commits).
// Instead, it sets Top to true on them.
func skipBrokenBuilders(boards []*Dashboard) {
const tooManyToBeFlakes = 4

for _, dash := range boards {
for _, rs := range dash.Results {
bad := 100 // squash failures at the top of the dashboard, which may turn out to be consistent
bad := 0
badStart := 0
top := true
skip := func(i int) { // skip the i-th result
if rs[i] != nil {
fmt.Printf("skip: builder %s was broken at %s (%s %s)\n", rs[i].Builder, shortHash(rs[i].Commit), dash.Repo, dash.GoBranch)
Expand All @@ -343,10 +361,24 @@ func skipBrokenBuilders(boards []*Dashboard) {
}
switch r.Status {
case bbpb.Status_SUCCESS:
if top && bad < tooManyToBeFlakes {
// Skip the run at the top.
// Too few to tell if it is flaky or consistent.
// It may also get fixed soon.
for j := 0; j < i; j++ {
skip(j)
}
}
top = false
bad = 0
continue
case bbpb.Status_FAILURE:
bad++
if top {
// Set Top to true, but don't skip.
r.Top = true
continue
}
default: // ignore other status
continue
}
Expand All @@ -368,7 +400,7 @@ func skipBrokenBuilders(boards []*Dashboard) {
// even if there are just a few of them. Otherwise we get
// spurious flakes when there's one bad entry before the
// cutoff and lots after the cutoff.
if bad > 0 && badStart > 0 {
if bad > 0 {
for j := badStart; j < len(rs); j++ {
skip(j)
}
Expand Down

0 comments on commit 1cfd0d1

Please sign in to comment.