Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fastwalk: add optional sorting and improve documentation #27

Merged
merged 1 commit into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions adapters.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@ func isDir(path string, d fs.DirEntry) bool {
return false
}

// IgnoreDuplicateDirs wraps fs.WalkDirFunc walkFn to make it follow symbolic
// IgnoreDuplicateDirs wraps [fs.WalkDirFunc] walkFn to make it follow symbolic
// links and ignore duplicate directories (if a symlink points to a directory
// that has already been traversed it is skipped). The walkFn is called for
// for skipped directories, but the directory is not traversed (this is
// required for error handling).
//
// The Config.Follow setting has no effect on the behavior of Walk when
// The Follow [Config] setting has no effect on the behavior of Walk when
// this wrapper is used.
//
// In most use cases, the returned fs.WalkDirFunc should not be reused between
// in another call to Walk. If it is reused, any previously visited file will
// be skipped.
// In most use cases, the returned [fs.WalkDirFunc] should not be reused.
// If it is reused, any previously visited file will be skipped.
//
// NOTE: The order of traversal is undefined. Given an "example" directory
// like the one below where "dir" is a directory and "smydir1" and "smydir2"
Expand Down Expand Up @@ -68,9 +67,8 @@ func IgnoreDuplicateDirs(walkFn fs.WalkDirFunc) fs.WalkDirFunc {
// files are ignored. If a symlink resolves to a file that has already been
// visited it will be skipped.
//
// In most use cases, the returned fs.WalkDirFunc should not be reused between
// in another call to Walk. If it is reused, any previously visited file will
// be skipped.
// In most use cases, the returned [fs.WalkDirFunc] should not be reused.
// If it is reused, any previously visited file will be skipped.
//
// This can significantly slow Walk as os.Stat() is called for each path
// (on Windows, os.Stat() is only needed for symlinks).
Expand All @@ -92,8 +90,8 @@ func IgnoreDuplicateFiles(walkFn fs.WalkDirFunc) fs.WalkDirFunc {
}
}

// IgnorePermissionErrors wraps walkFn so that permission errors are ignored.
// The returned fs.WalkDirFunc may be reused.
// IgnorePermissionErrors wraps walkFn so that [fs.ErrPermission] permission
// errors are ignored. The returned [fs.WalkDirFunc] may be reused.
func IgnorePermissionErrors(walkFn fs.WalkDirFunc) fs.WalkDirFunc {
return func(path string, d fs.DirEntry, err error) error {
if err != nil && os.IsPermission(err) {
Expand Down
15 changes: 10 additions & 5 deletions dirent.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,16 @@ func loadFileInfo(pinfo **fileInfo) *fileInfo {
return fi
}

// StatDirEntry returns the fs.FileInfo for the file or subdirectory described
// by the entry. If the entry is a symbolic link, StatDirEntry returns the
// fs.FileInfo for the file the line references (os.Stat).
// If fs.DirEntry de is a fastwalk.DirEntry it's Stat() method is used and the
// returned fs.FileInfo may be a previously cached result.
// StatDirEntry returns a [fs.FileInfo] describing the named file ([os.Stat]).
// If de is a [fastwalk.DirEntry] its Stat method is used and the returned
// FileInfo may be cached from a prior call to Stat. If a cached result is not
// desired, users should just call [os.Stat] directly.
//
// This is a helper function for calling Stat on the DirEntry passed to the
// walkFn argument to [Walk].
//
// The path argument is only used if de is not of type [fastwalk.DirEntry].
// Therefore, de should be the DirEntry describing path.
func StatDirEntry(path string, de fs.DirEntry) (fs.FileInfo, error) {
if de == nil {
return nil, &os.PathError{Op: "stat", Path: path, Err: syscall.EINVAL}
Expand Down
100 changes: 93 additions & 7 deletions dirent_portable.go
Original file line number Diff line number Diff line change
@@ -1,37 +1,123 @@
//go:build !darwin && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris)

// TODO: add a "portable_dirent" build tag so that we can test this
// on non-Windows platforms

package fastwalk

import (
"io/fs"
"os"
"slices"
"strings"
"sync"
)

var _ DirEntry = (*portableDirent)(nil)

type portableDirent struct {
fs.DirEntry
path string
stat *fileInfo
parent string
stat *fileInfo
}

func (d *portableDirent) String() string {
return fs.FormatDirEntry(d)
}

// TODO: cache the result of Stat
func (d *portableDirent) Stat() (fs.FileInfo, error) {
if d.DirEntry.Type()&os.ModeSymlink == 0 {
return d.DirEntry.Info()
}
stat := loadFileInfo(&d.stat)
stat.once.Do(func() {
stat.FileInfo, stat.err = os.Stat(d.path)
stat.FileInfo, stat.err = os.Stat(d.parent + string(os.PathSeparator) + d.Name())
})
return stat.FileInfo, stat.err
}

func newDirEntry(dirName string, info fs.DirEntry) fs.DirEntry {
func newDirEntry(dirName string, info fs.DirEntry) DirEntry {
return &portableDirent{
DirEntry: info,
path: dirName + string(os.PathSeparator) + info.Name(),
parent: dirName,
}
}

func fileInfoToDirEntry(dirname string, fi fs.FileInfo) fs.DirEntry {
func fileInfoToDirEntry(dirname string, fi fs.FileInfo) DirEntry {
return newDirEntry(dirname, fs.FileInfoToDirEntry(fi))
}

var direntSlicePool = sync.Pool{
New: func() any {
a := make([]DirEntry, 0, 32)
return &a
},
}

func putDirentSlice(p *[]DirEntry) {
// max is half as many as Unix because twice the size
if p != nil && cap(*p) <= 16*1024 {
a := *p
for i := range a {
a[i] = nil
}
*p = a[:0]
direntSlicePool.Put(p)
}
}

func sortDirents(mode SortMode, dents []DirEntry) {
if len(dents) <= 1 {
return
}
switch mode {
case SortLexical:
slices.SortFunc(dents, func(d1, d2 DirEntry) int {
return strings.Compare(d1.Name(), d2.Name())
})
case SortFilesFirst:
slices.SortFunc(dents, func(d1, d2 DirEntry) int {
r1 := d1.Type().IsRegular()
r2 := d2.Type().IsRegular()
switch {
case r1 && !r2:
return -1
case !r1 && r2:
return 1
case !r1 && !r2:
// Both are not regular files: sort directories last
dd1 := d1.Type().IsDir()
dd2 := d2.Type().IsDir()
switch {
case !dd1 && dd2:
return -1
case dd1 && !dd2:
return 1
}
}
return strings.Compare(d1.Name(), d2.Name())
})
case SortDirsFirst:
slices.SortFunc(dents, func(d1, d2 DirEntry) int {
dd1 := d1.Type().IsDir()
dd2 := d2.Type().IsDir()
switch {
case dd1 && !dd2:
return -1
case !dd1 && dd2:
return 1
case !dd1 && !dd2:
// Both are not directories: sort regular files first
r1 := d1.Type().IsRegular()
r2 := d2.Type().IsRegular()
switch {
case r1 && !r2:
return -1
case !r1 && r2:
return 1
}
}
return strings.Compare(d1.Name(), d2.Name())
})
}
}
131 changes: 131 additions & 0 deletions dirent_portable_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
//go:build !darwin && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris)

package fastwalk

import (
"io/fs"
"math/rand"
"reflect"
"testing"
"time"
)

var _ DirEntry = dirEntry{}

// Minimal DirEntry for testing
type dirEntry struct {
name string
typ fs.FileMode
}

func (de dirEntry) Name() string { return de.name }
func (de dirEntry) IsDir() bool { return de.typ.IsDir() }
func (de dirEntry) Type() fs.FileMode { return de.typ.Type() }
func (de dirEntry) Info() (fs.FileInfo, error) { panic("not implemented") }
func (de dirEntry) Stat() (fs.FileInfo, error) { panic("not implemented") }

func (de dirEntry) String() string {
return fs.FormatDirEntry(de)
}

// NB: this must be kept in sync with the
// TestSortDirents in dirent_unix_test.go
func TestSortDirents(t *testing.T) {
direntNames := func(dents []DirEntry) []string {
names := make([]string, len(dents))
for i, d := range dents {
names[i] = d.Name()
}
return names
}

t.Run("None", func(t *testing.T) {
dents := []DirEntry{
dirEntry{name: "b"},
dirEntry{name: "a"},
dirEntry{name: "d"},
dirEntry{name: "c"},
}
want := direntNames(dents)
sortDirents(SortNone, dents)
got := direntNames(dents)
if !reflect.DeepEqual(got, want) {
t.Errorf("got: %q want: %q", got, want)
}
})

rr := rand.New(rand.NewSource(time.Now().UnixNano()))
shuffleDirents := func(dents []DirEntry) []DirEntry {
rr.Shuffle(len(dents), func(i, j int) {
dents[i], dents[j] = dents[j], dents[i]
})
return dents
}

// dents needs to be in the expected order
test := func(t *testing.T, dents []DirEntry, mode SortMode) {
want := direntNames(dents)
// Run multiple times with different shuffles
for i := 0; i < 10; i++ {
t.Run("", func(t *testing.T) {
sortDirents(mode, shuffleDirents(dents))
got := direntNames(dents)
if !reflect.DeepEqual(got, want) {
t.Errorf("got: %q want: %q", got, want)
}
})
}
}

t.Run("Lexical", func(t *testing.T) {
dents := []DirEntry{
dirEntry{name: "a"},
dirEntry{name: "b"},
dirEntry{name: "c"},
dirEntry{name: "d"},
}
test(t, dents, SortLexical)
})

t.Run("FilesFirst", func(t *testing.T) {
dents := []DirEntry{
// Files lexically
dirEntry{name: "f1", typ: 0},
dirEntry{name: "f2", typ: 0},
dirEntry{name: "f3", typ: 0},
// Non-dirs lexically
dirEntry{name: "a1", typ: fs.ModeSymlink},
dirEntry{name: "a2", typ: fs.ModeSymlink},
dirEntry{name: "a3", typ: fs.ModeSymlink},
dirEntry{name: "s1", typ: fs.ModeSocket},
dirEntry{name: "s2", typ: fs.ModeSocket},
dirEntry{name: "s3", typ: fs.ModeSocket},
// Dirs lexically
dirEntry{name: "d1", typ: fs.ModeDir},
dirEntry{name: "d2", typ: fs.ModeDir},
dirEntry{name: "d3", typ: fs.ModeDir},
}
test(t, dents, SortFilesFirst)
})

t.Run("DirsFirst", func(t *testing.T) {
dents := []DirEntry{
// Dirs lexically
dirEntry{name: "d1", typ: fs.ModeDir},
dirEntry{name: "d2", typ: fs.ModeDir},
dirEntry{name: "d3", typ: fs.ModeDir},
// Files lexically
dirEntry{name: "f1", typ: 0},
dirEntry{name: "f2", typ: 0},
dirEntry{name: "f3", typ: 0},
// Non-dirs lexically
dirEntry{name: "a1", typ: fs.ModeSymlink},
dirEntry{name: "a2", typ: fs.ModeSymlink},
dirEntry{name: "a3", typ: fs.ModeSymlink},
dirEntry{name: "s1", typ: fs.ModeSocket},
dirEntry{name: "s2", typ: fs.ModeSocket},
dirEntry{name: "s3", typ: fs.ModeSocket},
}
test(t, dents, SortDirsFirst)
})
}
Loading
Loading