From a604eb6c69bbdb714368e39bcc29438661b2d863 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 16 Jul 2024 23:15:27 +1000 Subject: [PATCH] tests: fix race when pausing rename swap The pauseCh was only synchronised one way, which resulted in a race window here the test thread (after requesting the pause) would get the real path before the rename thread swapped the file back. This is easily fixed by doing the swap twice each loop iteration so that we only receive pause requests when we are in an okay-to-be-paused state. Removing the retry logic lets us do far more test runs for the racing tests, removing the need for the -short suite. The MkdirAll tests are still a bit slow, but 2k runs should be fine. Signed-off-by: Aleksa Sarai --- .github/workflows/ci.yml | 23 ++++++----------------- lookup_linux_test.go | 36 ++++-------------------------------- mkdir_linux_test.go | 18 ++++++++---------- util_linux_test.go | 16 +++++----------- 4 files changed, 23 insertions(+), 70 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 770f6e8..7b24d9f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: name: coverage-${{ runner.os }}-${{ github.job }}-${{ strategy.job-index }} path: ${{ env.GOCOVERDIR }} - test-unix-short: + test-unix: strategy: fail-fast: false matrix: @@ -74,32 +74,21 @@ jobs: run: | GOCOVERDIR="$(mktemp --tmpdir -d gocoverdir.XXXXXXXX)" echo "GOCOVERDIR=$GOCOVERDIR" >>"$GITHUB_ENV" - - name: unit tests - run: | - go test -short -v -cover -test.gocoverdir="$GOCOVERDIR" ./... - sudo go test -short -v -cover -test.gocoverdir="$GOCOVERDIR" ./... + - name: go test + run: go test -v -cover -test.gocoverdir="$GOCOVERDIR" ./... + - name: sudo go test + run: sudo go test -v -cover -test.gocoverdir="$GOCOVERDIR" ./... - name: upload coverage uses: actions/upload-artifact@v4 with: name: coverage-${{ runner.os }}-${{ github.job }}-${{ strategy.job-index }} path: ${{ env.GOCOVERDIR }} - test-linux-stress: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: - go-version: "^1" - - name: stress test race protections - run: | - sudo go test -timeout 1h -v -cover -run 'Test.*Racing' ./... - coverage: runs-on: ubuntu-latest needs: - test-windows - - test-unix-short + - test-unix steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 diff --git a/lookup_linux_test.go b/lookup_linux_test.go index f976681..f755366 100644 --- a/lookup_linux_test.go +++ b/lookup_linux_test.go @@ -311,7 +311,7 @@ func TestPartialLookupInRoot_BadInode(t *testing.T) { type racingLookupMeta struct { pauseCh chan struct{} passOkCount, passErrCount, skipCount, failCount, badErrCount int // test state counts - badNameCount, fixRemainingPathCount, unstableProcSelfFdCount int // workaround counts + badNameCount, fixRemainingPathCount int // workaround counts skipErrCounts map[error]int } @@ -351,32 +351,13 @@ func (m *racingLookupMeta) checkPartialLookup(t *testing.T, rootDir *os.File, un handleName string realPath string unixStat unix.Stat_t - realPaths []string ) if handle != nil { handleName = handle.Name() // Get the "proper" name from procSelfFdReadlink. m.pauseCh <- struct{}{} - // For some reason, it seems that (at a rate of 0.01% or so) even - // though we pause the rename thread is a nonsensical path from - // procSelfFdReadlink that looks like the path is still swapped. The - // key thing to note is that adding sleeps doesn't seem to help much, - // but retrying several times usually ends up with us getting the - // "right" result eventually. - // - // This seems like a kernel bug (or a weird issue with Go channels), - // but for now let's just retry a few times and keep track of how many - // transitions we saw. If we only see one change, then we can use the - // last one and just track the statistics. - const readlinkRetryMax = 500 realPath, err = procSelfFdReadlink(handle) - for i := 0; err == nil && i < readlinkRetryMax; i++ { - if last := len(realPaths) - 1; last < 0 || realPath != realPaths[last] { - realPaths = append(realPaths, realPath) - } - realPath, err = procSelfFdReadlink(handle) - } <-m.pauseCh require.NoError(t, err, "get real path of returned handle") @@ -386,12 +367,6 @@ func (m *racingLookupMeta) checkPartialLookup(t *testing.T, rootDir *os.File, un _ = handle.Close() } - assert.LessOrEqualf(t, len(realPaths), 2, "saw more than one transition in procSelfFdReadlink results: %v", realPaths) - if len(realPaths) > 1 { - m.unstableProcSelfFdCount++ - assert.Contains(t, realPaths, handleName, "at least one real path should match the handle name if we got more than one real path") - } - if realPath != handleName { // It's possible for handle.Name() to be wrong because while it was // correct when it was set, it might not match if the path was swapped @@ -539,10 +514,7 @@ func TestPartialLookup_RacingRename(t *testing.T) { go doRenameExchangeLoop(pauseCh, exitCh, rootDir, test.subPathA, test.subPathB) // Do several runs to try to catch bugs. - var testRuns = 10000 - if testing.Short() { - testRuns = 300 - } + const testRuns = 50000 m := newRacingLookupMeta(pauseCh) for i := 0; i < testRuns; i++ { m.checkPartialLookup(t, rootDir, test.unsafePath, test.skipErrs, test.allowedResults) @@ -558,9 +530,9 @@ func TestPartialLookup_RacingRename(t *testing.T) { testRuns, pct(m.passOkCount), pct(m.passErrCount), pct(m.skipCount), pct(m.failCount), // failures due to incorrect errors (rather than bad paths) pct(m.badErrCount)) - t.Logf(" badHandleName=%s fixRemainingPath=%s unstableProcSelfFdCount=%s", + t.Logf(" badHandleName=%s fixRemainingPath=%s", // stats for how many test runs had to have some "workarounds" - pct(m.badNameCount), pct(m.fixRemainingPathCount), pct(m.unstableProcSelfFdCount)) + pct(m.badNameCount), pct(m.fixRemainingPathCount)) if len(m.skipErrCounts) > 0 { t.Logf(" skipErr breakdown:") for err, count := range m.skipErrCounts { diff --git a/mkdir_linux_test.go b/mkdir_linux_test.go index 879e62e..bdad1c9 100644 --- a/mkdir_linux_test.go +++ b/mkdir_linux_test.go @@ -11,6 +11,7 @@ import ( "fmt" "os" "path/filepath" + "runtime" "testing" "github.com/stretchr/testify/assert" @@ -358,16 +359,15 @@ func TestMkdirAllHandle_RacingRename(t *testing.T) { }(rootCh) // Do several runs to try to catch bugs. - var testRuns = 10000 - if testing.Short() { - testRuns = 300 - } + const testRuns = 2000 m := newRacingMkdirMeta() for i := 0; i < testRuns; i++ { root := createTree(t, treeSpec...) - rootCh <- root + rootCh <- root + runtime.Gosched() // give the thread some time to do a rename m.checkMkdirAllHandle_Racing(t, root, test.unsafePath, 0o711, test.allowedErrs) + rootCh <- "" // Clean up the root after each run so we don't exhaust all // space in the tmpfs. @@ -430,16 +430,14 @@ func TestMkdirAllHandle_RacingDelete(t *testing.T) { }(rootCh) // Do several runs to try to catch bugs. - var testRuns = 10000 - if testing.Short() { - testRuns = 300 - } + const testRuns = 2000 m := newRacingMkdirMeta() for i := 0; i < testRuns; i++ { root := createTree(t, treeSpec...) - rootCh <- root + rootCh <- root m.checkMkdirAllHandle_Racing(t, root, test.unsafePath, 0o711, test.allowedErrs) + rootCh <- "" // Clean up the root after each run so we don't exhaust all // space in the tmpfs. diff --git a/util_linux_test.go b/util_linux_test.go index b3f94f5..5f4c1d5 100644 --- a/util_linux_test.go +++ b/util_linux_test.go @@ -89,21 +89,11 @@ func hasRenameExchange() bool { } func doRenameExchangeLoop(pauseCh chan struct{}, exitCh <-chan struct{}, dir *os.File, pathA, pathB string) { - var swapped bool - swap := func() { - _ = unix.Renameat2(int(dir.Fd()), pathA, int(dir.Fd()), pathB, unix.RENAME_EXCHANGE) - //unix.Sync() - swapped = !swapped - } - for { select { case <-exitCh: return case <-pauseCh: - if swapped { - swap() - } // Wait for caller to unpause us. select { case pauseCh <- struct{}{}: @@ -111,7 +101,11 @@ func doRenameExchangeLoop(pauseCh chan struct{}, exitCh <-chan struct{}, dir *os return } default: - swap() + // Do the swap twice so that we only pause when we are in a + // "correct" state. + for i := 0; i < 2; i++ { + _ = unix.Renameat2(int(dir.Fd()), pathA, int(dir.Fd()), pathB, unix.RENAME_EXCHANGE) + } } } }