Skip to content

Commit

Permalink
fastwalk: add optional sorting and improve documentation
Browse files Browse the repository at this point in the history
This commit adds the new SortMode and Config.Sort setting to sort
a directory's entries before they are processed. This does not make
the global order that directories and entries are visited
non-deterministic but it does help make the output a bit saner compared
to the default directory order. This was added to make the output of FZF
a bit nicer.

This commit also improves documentation and comments of exported
functions.
  • Loading branch information
charlievieth committed Jul 18, 2024
1 parent 875daa3 commit 18d517e
Show file tree
Hide file tree
Showing 14 changed files with 1,020 additions and 280 deletions.
18 changes: 8 additions & 10 deletions adapters.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@ func isDir(path string, d fs.DirEntry) bool {
return false
}

// IgnoreDuplicateDirs wraps fs.WalkDirFunc walkFn to make it follow symbolic
// IgnoreDuplicateDirs wraps [fs.WalkDirFunc] walkFn to make it follow symbolic
// links and ignore duplicate directories (if a symlink points to a directory
// that has already been traversed it is skipped). The walkFn is called for
// for skipped directories, but the directory is not traversed (this is
// required for error handling).
//
// The Config.Follow setting has no effect on the behavior of Walk when
// The Follow [Config] setting has no effect on the behavior of Walk when
// this wrapper is used.
//
// In most use cases, the returned fs.WalkDirFunc should not be reused between
// in another call to Walk. If it is reused, any previously visited file will
// be skipped.
// In most use cases, the returned [fs.WalkDirFunc] should not be reused.
// If it is reused, any previously visited file will be skipped.
//
// NOTE: The order of traversal is undefined. Given an "example" directory
// like the one below where "dir" is a directory and "smydir1" and "smydir2"
Expand Down Expand Up @@ -68,9 +67,8 @@ func IgnoreDuplicateDirs(walkFn fs.WalkDirFunc) fs.WalkDirFunc {
// files are ignored. If a symlink resolves to a file that has already been
// visited it will be skipped.
//
// In most use cases, the returned fs.WalkDirFunc should not be reused between
// in another call to Walk. If it is reused, any previously visited file will
// be skipped.
// In most use cases, the returned [fs.WalkDirFunc] should not be reused.
// If it is reused, any previously visited file will be skipped.
//
// This can significantly slow Walk as os.Stat() is called for each path
// (on Windows, os.Stat() is only needed for symlinks).
Expand All @@ -92,8 +90,8 @@ func IgnoreDuplicateFiles(walkFn fs.WalkDirFunc) fs.WalkDirFunc {
}
}

// IgnorePermissionErrors wraps walkFn so that permission errors are ignored.
// The returned fs.WalkDirFunc may be reused.
// IgnorePermissionErrors wraps walkFn so that [fs.ErrPermission] permission
// errors are ignored. The returned [fs.WalkDirFunc] may be reused.
func IgnorePermissionErrors(walkFn fs.WalkDirFunc) fs.WalkDirFunc {
return func(path string, d fs.DirEntry, err error) error {
if err != nil && os.IsPermission(err) {
Expand Down
15 changes: 10 additions & 5 deletions dirent.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,16 @@ func loadFileInfo(pinfo **fileInfo) *fileInfo {
return fi
}

// StatDirEntry returns the fs.FileInfo for the file or subdirectory described
// by the entry. If the entry is a symbolic link, StatDirEntry returns the
// fs.FileInfo for the file the line references (os.Stat).
// If fs.DirEntry de is a fastwalk.DirEntry it's Stat() method is used and the
// returned fs.FileInfo may be a previously cached result.
// StatDirEntry returns a [fs.FileInfo] describing the named file ([os.Stat]).
// If de is a [fastwalk.DirEntry] its Stat method is used and the returned
// FileInfo may be cached from a prior call to Stat. If a cached result is not
// desired, users should just call [os.Stat] directly.
//
// This is a helper function for calling Stat on the DirEntry passed to the
// walkFn argument to [Walk].
//
// The path argument is only used if de is not of type [fastwalk.DirEntry].
// Therefore, de should be the DirEntry describing path.
func StatDirEntry(path string, de fs.DirEntry) (fs.FileInfo, error) {
if de == nil {
return nil, &os.PathError{Op: "stat", Path: path, Err: syscall.EINVAL}
Expand Down
100 changes: 93 additions & 7 deletions dirent_portable.go
Original file line number Diff line number Diff line change
@@ -1,37 +1,123 @@
//go:build !darwin && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris)

// TODO: add a "portable_dirent" build tag so that we can test this
// on non-Windows platforms

package fastwalk

import (
"io/fs"
"os"
"slices"
"strings"
"sync"
)

var _ DirEntry = (*portableDirent)(nil)

type portableDirent struct {
fs.DirEntry
path string
stat *fileInfo
parent string
stat *fileInfo
}

func (d *portableDirent) String() string {
return fs.FormatDirEntry(d)
}

// TODO: cache the result of Stat
func (d *portableDirent) Stat() (fs.FileInfo, error) {
if d.DirEntry.Type()&os.ModeSymlink == 0 {
return d.DirEntry.Info()
}
stat := loadFileInfo(&d.stat)
stat.once.Do(func() {
stat.FileInfo, stat.err = os.Stat(d.path)
stat.FileInfo, stat.err = os.Stat(d.parent + string(os.PathSeparator) + d.Name())
})
return stat.FileInfo, stat.err
}

func newDirEntry(dirName string, info fs.DirEntry) fs.DirEntry {
func newDirEntry(dirName string, info fs.DirEntry) DirEntry {
return &portableDirent{
DirEntry: info,
path: dirName + string(os.PathSeparator) + info.Name(),
parent: dirName,
}
}

func fileInfoToDirEntry(dirname string, fi fs.FileInfo) fs.DirEntry {
func fileInfoToDirEntry(dirname string, fi fs.FileInfo) DirEntry {
return newDirEntry(dirname, fs.FileInfoToDirEntry(fi))
}

var direntSlicePool = sync.Pool{
New: func() any {
a := make([]DirEntry, 0, 32)
return &a
},
}

func putDirentSlice(p *[]DirEntry) {
// max is half as many as Unix because twice the size
if p != nil && cap(*p) <= 16*1024 {
a := *p
for i := range a {
a[i] = nil
}
*p = a[:0]
direntSlicePool.Put(p)
}
}

func sortDirents(mode SortMode, dents []DirEntry) {
if len(dents) <= 1 {
return
}
switch mode {
case SortLexical:
slices.SortFunc(dents, func(d1, d2 DirEntry) int {
return strings.Compare(d1.Name(), d2.Name())
})
case SortFilesFirst:
slices.SortFunc(dents, func(d1, d2 DirEntry) int {
r1 := d1.Type().IsRegular()
r2 := d2.Type().IsRegular()
switch {
case r1 && !r2:
return -1
case !r1 && r2:
return 1
case !r1 && !r2:
// Both are not regular files: sort directories last
dd1 := d1.Type().IsDir()
dd2 := d2.Type().IsDir()
switch {
case !dd1 && dd2:
return -1
case dd1 && !dd2:
return 1
}
}
return strings.Compare(d1.Name(), d2.Name())
})
case SortDirsFirst:
slices.SortFunc(dents, func(d1, d2 DirEntry) int {
dd1 := d1.Type().IsDir()
dd2 := d2.Type().IsDir()
switch {
case dd1 && !dd2:
return -1
case !dd1 && dd2:
return 1
case !dd1 && !dd2:
// Both are not directories: sort regular files first
r1 := d1.Type().IsRegular()
r2 := d2.Type().IsRegular()
switch {
case r1 && !r2:
return -1
case !r1 && r2:
return 1
}
}
return strings.Compare(d1.Name(), d2.Name())
})
}
}
131 changes: 131 additions & 0 deletions dirent_portable_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
//go:build !darwin && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris)

package fastwalk

import (
"io/fs"
"math/rand"
"reflect"
"testing"
"time"
)

var _ DirEntry = dirEntry{}

// Minimal DirEntry for testing
type dirEntry struct {
name string
typ fs.FileMode
}

func (de dirEntry) Name() string { return de.name }
func (de dirEntry) IsDir() bool { return de.typ.IsDir() }
func (de dirEntry) Type() fs.FileMode { return de.typ.Type() }
func (de dirEntry) Info() (fs.FileInfo, error) { panic("not implemented") }
func (de dirEntry) Stat() (fs.FileInfo, error) { panic("not implemented") }

func (de dirEntry) String() string {
return fs.FormatDirEntry(de)
}

// NB: this must be kept in sync with the
// TestSortDirents in dirent_unix_test.go
func TestSortDirents(t *testing.T) {
direntNames := func(dents []DirEntry) []string {
names := make([]string, len(dents))
for i, d := range dents {
names[i] = d.Name()
}
return names
}

t.Run("None", func(t *testing.T) {
dents := []DirEntry{
dirEntry{name: "b"},
dirEntry{name: "a"},
dirEntry{name: "d"},
dirEntry{name: "c"},
}
want := direntNames(dents)
sortDirents(SortNone, dents)
got := direntNames(dents)
if !reflect.DeepEqual(got, want) {
t.Errorf("got: %q want: %q", got, want)
}
})

rr := rand.New(rand.NewSource(time.Now().UnixNano()))
shuffleDirents := func(dents []DirEntry) []DirEntry {
rr.Shuffle(len(dents), func(i, j int) {
dents[i], dents[j] = dents[j], dents[i]
})
return dents
}

// dents needs to be in the expected order
test := func(t *testing.T, dents []DirEntry, mode SortMode) {
want := direntNames(dents)
// Run multiple times with different shuffles
for i := 0; i < 10; i++ {
t.Run("", func(t *testing.T) {
sortDirents(mode, shuffleDirents(dents))
got := direntNames(dents)
if !reflect.DeepEqual(got, want) {
t.Errorf("got: %q want: %q", got, want)
}
})
}
}

t.Run("Lexical", func(t *testing.T) {
dents := []DirEntry{
dirEntry{name: "a"},
dirEntry{name: "b"},
dirEntry{name: "c"},
dirEntry{name: "d"},
}
test(t, dents, SortLexical)
})

t.Run("FilesFirst", func(t *testing.T) {
dents := []DirEntry{
// Files lexically
dirEntry{name: "f1", typ: 0},
dirEntry{name: "f2", typ: 0},
dirEntry{name: "f3", typ: 0},
// Non-dirs lexically
dirEntry{name: "a1", typ: fs.ModeSymlink},
dirEntry{name: "a2", typ: fs.ModeSymlink},
dirEntry{name: "a3", typ: fs.ModeSymlink},
dirEntry{name: "s1", typ: fs.ModeSocket},
dirEntry{name: "s2", typ: fs.ModeSocket},
dirEntry{name: "s3", typ: fs.ModeSocket},
// Dirs lexically
dirEntry{name: "d1", typ: fs.ModeDir},
dirEntry{name: "d2", typ: fs.ModeDir},
dirEntry{name: "d3", typ: fs.ModeDir},
}
test(t, dents, SortFilesFirst)
})

t.Run("DirsFirst", func(t *testing.T) {
dents := []DirEntry{
// Dirs lexically
dirEntry{name: "d1", typ: fs.ModeDir},
dirEntry{name: "d2", typ: fs.ModeDir},
dirEntry{name: "d3", typ: fs.ModeDir},
// Files lexically
dirEntry{name: "f1", typ: 0},
dirEntry{name: "f2", typ: 0},
dirEntry{name: "f3", typ: 0},
// Non-dirs lexically
dirEntry{name: "a1", typ: fs.ModeSymlink},
dirEntry{name: "a2", typ: fs.ModeSymlink},
dirEntry{name: "a3", typ: fs.ModeSymlink},
dirEntry{name: "s1", typ: fs.ModeSocket},
dirEntry{name: "s2", typ: fs.ModeSocket},
dirEntry{name: "s3", typ: fs.ModeSocket},
}
test(t, dents, SortDirsFirst)
})
}
Loading

0 comments on commit 18d517e

Please sign in to comment.