Skip to content

Commit

Permalink
Merge pull request #17662 from siyuanfoundation/flaky
Browse files Browse the repository at this point in the history
Add script and workflow to detect flaky tests in testgrid.
  • Loading branch information
ahrtr authored Apr 5, 2024
2 parents 9cb33de + bf44390 commit 0168c3e
Show file tree
Hide file tree
Showing 9 changed files with 2,414 additions and 0 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/measure-testgrid-flakiness.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
name: Measure TestGrid Flakiness

on:
schedule:
- cron: "0 0 * * 0" # run every Sunday at midnight

permissions: read-all

jobs:
measure-testgrid-flakiness:
name: Measure TestGrid Flakiness
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- id: goversion
run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
- uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: ${{ steps.goversion.outputs.goversion }}
- env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
./scripts/measure-testgrid-flakiness.sh
15 changes: 15 additions & 0 deletions scripts/measure-testgrid-flakiness.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env bash
# Measures test flakiness and create issues for flaky tests

set -euo pipefail

if [[ -z ${GITHUB_TOKEN:-} ]]
then
echo "Please set the \$GITHUB_TOKEN environment variable for the script to work"
exit 1
fi

pushd ./tools/testgrid-analysis
go run main.go flaky --create-issue --dashboard=sig-etcd-periodics --tab=ci-etcd-e2e-amd64
go run main.go flaky --create-issue --dashboard=sig-etcd-periodics --tab=ci-etcd-unit-test-amd64
popd
146 changes: 146 additions & 0 deletions tools/testgrid-analysis/cmd/data.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
// Copyright 2024 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"fmt"
"io"
"net/http"
"os"
"strings"

apipb "github.com/GoogleCloudPlatform/testgrid/pb/api/v1"
statuspb "github.com/GoogleCloudPlatform/testgrid/pb/test_status"
"google.golang.org/protobuf/encoding/protojson"
)

var (
validTestStatuses = []statuspb.TestStatus{statuspb.TestStatus_PASS, statuspb.TestStatus_FAIL, statuspb.TestStatus_FLAKY}
failureTestStatuses = []statuspb.TestStatus{statuspb.TestStatus_FAIL, statuspb.TestStatus_FLAKY}
validTestStatusesInt = intStatusSet(validTestStatuses)
failureTestStatusesInt = intStatusSet(failureTestStatuses)

skippedTestStatuses = make(map[int32]struct{})
)

type TestResultSummary struct {
Name string
FullName string
TotalRuns, FailedRuns int
FailureRate float32
FailureLogs []string
IssueBody string
}

func fetchTestResultSummaries(dashboard, tab string) []*TestResultSummary {
// Fetch test data
rowsURL := fmt.Sprintf("http://testgrid-data.k8s.io/api/v1/dashboards/%s/tabs/%s/rows", dashboard, tab)
headersURL := fmt.Sprintf("http://testgrid-data.k8s.io/api/v1/dashboards/%s/tabs/%s/headers", dashboard, tab)

var testData apipb.ListRowsResponse
var headerData apipb.ListHeadersResponse
protojson.Unmarshal(fetchJSON(rowsURL), &testData)
protojson.Unmarshal(fetchJSON(headersURL), &headerData)

var allTests []string
for _, row := range testData.Rows {
allTests = append(allTests, row.Name)
}

summaries := []*TestResultSummary{}
// Process rows
for _, row := range testData.Rows {
t := processRow(dashboard, tab, row, allTests, headerData.Headers)
summaries = append(summaries, t)
}
return summaries
}

func processRow(dashboard, tab string, row *apipb.ListRowsResponse_Row, allTests []string, headers []*apipb.ListHeadersResponse_Header) *TestResultSummary {
t := TestResultSummary{Name: shortenTestName(row.Name), FullName: row.Name}
// we do not want to create issues for a parent test.
if isParentTest(row.Name, allTests) {
return &t
}
if !strings.HasPrefix(row.Name, "go.etcd.io") {
return &t
}
total := 0
failed := 0
logs := []string{}
for i, cell := range row.Cells {
// ignore tests with status not in the validTestStatuses
// cell result codes are listed in https://github.com/GoogleCloudPlatform/testgrid/blob/main/pb/test_status/test_status.proto
if _, ok := validTestStatusesInt[cell.Result]; !ok {
if cell.Result != 0 {
skippedTestStatuses[cell.Result] = struct{}{}
}
continue
}
total += 1
if _, ok := failureTestStatusesInt[cell.Result]; ok {
failed += 1
header := headers[i]
// markdown table format of | commit | log |
logs = append(logs, fmt.Sprintf("| %s | %s | https://prow.k8s.io/view/gs/kubernetes-jenkins/logs/%s/%s |", strings.Join(header.Extra, ","), header.Started.AsTime().String(), tab, header.Build))
}
}
t.FailedRuns = failed
t.TotalRuns = total
t.FailureLogs = logs
t.FailureRate = float32(failed) / float32(total)
if t.FailedRuns > 0 {
dashboardUrl := fmt.Sprintf("[%s](https://testgrid.k8s.io/%s#%s)", tab, dashboard, tab)
t.IssueBody = fmt.Sprintf("## %s Test: %s \nTest failed %.1f%% (%d/%d) of the time\n\nfailure logs are:\n| commit | started | log |\n| --- | --- | --- |\n%s\n",
dashboardUrl, t.FullName, t.FailureRate*100, t.FailedRuns, t.TotalRuns, strings.Join(t.FailureLogs, "\n"))
t.IssueBody += "\nPlease follow the [instructions in the contributing guide](https://github.com/etcd-io/etcd/blob/main/CONTRIBUTING.md#check-for-flaky-tests) to reproduce the issue.\n"
}
return &t
}

// isParentTest checks if a test is a rollup of some child tests.
func isParentTest(test string, allTests []string) bool {
for _, t := range allTests {
if t != test && strings.HasPrefix(t, test+"/") {
return true
}
}
return false
}

func fetchJSON(url string) []byte {
resp, err := http.Get(url)
if err != nil {
fmt.Println("Error fetching test data:", err)
os.Exit(1)
}
defer resp.Body.Close()
testBody, _ := io.ReadAll(resp.Body)
return testBody
}

// intStatusSet converts a list of statuspb.TestStatus into a set of int.
func intStatusSet(statuses []statuspb.TestStatus) map[int32]struct{} {
s := make(map[int32]struct{})
for _, status := range statuses {
s[int32(status)] = struct{}{}
}
return s
}

func shortenTestName(fullname string) string {
parts := strings.Split(fullname, ".")
return parts[len(parts)-1]
}
75 changes: 75 additions & 0 deletions tools/testgrid-analysis/cmd/flaky.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright 2024 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"fmt"

"github.com/spf13/cobra"
)

// flakyCmd represents the flaky command
var flakyCmd = &cobra.Command{
Use: "flaky",
Short: "detect flaky tests",
Long: `detect flaky tests within the dashobard#tab, and create GitHub issues if desired.`,
Run: flakyFunc,
}

var (
flakyThreshold float32
minRuns int
createGithubIssue bool
githubOwner string
githubRepo string

lineSep = "-------------------------------------------------------------"
)

func init() {
rootCmd.AddCommand(flakyCmd)

flakyCmd.Flags().BoolVar(&createGithubIssue, "create-issue", false, "create Github issue for each flaky test")
flakyCmd.Flags().Float32Var(&flakyThreshold, "flaky-threshold", 0.1, "fraction threshold of test failures for a test to be considered flaky")
flakyCmd.Flags().IntVar(&minRuns, "min-runs", 20, "minimum test runs for a test to be included in flaky analysis")
flakyCmd.Flags().StringVar(&githubOwner, "github-owner", "etcd-io", "the github organization to create the issue for")
flakyCmd.Flags().StringVar(&githubRepo, "github-repo", "etcd", "the github repo to create the issue for")
}

func flakyFunc(cmd *cobra.Command, args []string) {
fmt.Printf("flaky called, for %s#%s, createGithubIssue=%v, githubRepo=%s/%s, flakyThreshold=%f, minRuns=%d\n", dashboard, tab, createGithubIssue, githubOwner, githubRepo, flakyThreshold, minRuns)

allTests := fetchTestResultSummaries(dashboard, tab)
flakyTests := []*TestResultSummary{}
for _, t := range allTests {
if t.TotalRuns >= minRuns && t.FailureRate >= flakyThreshold {
flakyTests = append(flakyTests, t)
}
}
fmt.Println(lineSep)
fmt.Printf("Detected total %d flaky tests for %s#%s\n", len(flakyTests), dashboard, tab)
fmt.Println(lineSep)
if len(flakyTests) == 0 {
return
}
for _, t := range flakyTests {
fmt.Println(lineSep)
fmt.Println(t.IssueBody)
fmt.Println(lineSep)
}
if createGithubIssue {
createIssues(flakyTests, []string{"type/flake"})
}
}
78 changes: 78 additions & 0 deletions tools/testgrid-analysis/cmd/github.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright 2024 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"context"
"fmt"
"os"
"strings"

"github.com/google/go-github/v60/github"
)

func createIssues(tests []*TestResultSummary, labels []string) {
openIssues := getOpenIssues(labels)
for _, t := range tests {
createIssueIfNonExist(tab, t, openIssues, append(labels, "help wanted"))
}
}

func getOpenIssues(labels []string) []*github.Issue {
client := github.NewClient(nil).WithAuthToken(os.Getenv("GITHUB_TOKEN"))
ctx := context.Background()
// list open issues with label type/flake
issueOpt := &github.IssueListByRepoOptions{
Labels: labels,
ListOptions: github.ListOptions{PerPage: 100},
}
allIssues := []*github.Issue{}
for {
issues, resp, err := client.Issues.ListByRepo(ctx, githubOwner, githubRepo, issueOpt)
if err != nil {
panic(err)
}
allIssues = append(allIssues, issues...)
if resp.NextPage == 0 {
break
}
issueOpt.Page = resp.NextPage
}
fmt.Printf("There are %d issues open with label %v\n", len(allIssues), labels)
return allIssues
}

func createIssueIfNonExist(tab string, t *TestResultSummary, issues []*github.Issue, labels []string) {
// check if there is already an open issue regarding this test
for _, issue := range issues {
if strings.Contains(*issue.Title, t.Name) {
fmt.Printf("%s is already open for test %s\n\n", issue.GetHTMLURL(), t.Name)
return
}
}
fmt.Printf("Opening new issue for %s\n", t.Name)
client := github.NewClient(nil).WithAuthToken(os.Getenv("GITHUB_TOKEN"))
ctx := context.Background()
req := &github.IssueRequest{
Title: github.String(fmt.Sprintf("Flaky test %s", t.Name)),
Body: &t.IssueBody,
Labels: &labels,
}
issue, _, err := client.Issues.Create(ctx, githubOwner, githubRepo, req)
if err != nil {
panic(err)
}
fmt.Printf("New issue %s created for %s\n\n", issue.GetHTMLURL(), t.Name)
}
44 changes: 44 additions & 0 deletions tools/testgrid-analysis/cmd/root.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2024 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"os"

"github.com/spf13/cobra"
)

var (
dashboard string
tab string
)

var rootCmd = &cobra.Command{
Use: "testgrid-analysis",
Short: "testgrid-analysis",
Long: `testgrid-analysis analyzes the testgrid test results of sig-etcd.`,
}

func Execute() {
err := rootCmd.Execute()
if err != nil {
os.Exit(1)
}
}

func init() {
rootCmd.PersistentFlags().StringVar(&dashboard, "dashboard", "sig-etcd-periodics", "testgrid dashboard to retrieve data from")
rootCmd.PersistentFlags().StringVar(&tab, "tab", "ci-etcd-e2e-amd64", "testgrid tab within the dashboard to retrieve data from")
}
Loading

0 comments on commit 0168c3e

Please sign in to comment.