Skip to content

Commit

Permalink
Correct blockReader.Skip logic
Browse files Browse the repository at this point in the history
The existing logic had a rare edge case where it would seek incorrectly. Most of
the time, it wouldn't skip at all.
  • Loading branch information
colinmarc committed Jul 15, 2023
1 parent be7d224 commit 13d9368
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 26 deletions.
23 changes: 9 additions & 14 deletions file_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,25 +156,20 @@ func (f *FileReader) Seek(offset int64, whence int) (int64, error) {
return f.offset, fmt.Errorf("invalid resulting offset: %d", off)
}

if f.offset != off {
f.offset = off

if f.blockReader != nil {
// If the seek is within the next few chunks, it's much more
// efficient to throw away a few bytes than to reconnect and start
// a read at the new offset.
err := f.blockReader.Skip(f.offset - f.blockReader.Offset)
if err == nil {
return f.offset, nil
}

// It isn't possible to seek within the current block, so reset such
// that we can connect to the new block.
if f.blockReader != nil {
// If the seek is within the next few chunks, it's much more
// efficient to throw away a few bytes than to reconnect and start
// a read at the new offset.
err := f.blockReader.Skip(off - f.offset)
if err != nil {
// It isn't possible to skip forward in the current block, so reset such
// that we can reconnect at the new offset.
f.blockReader.Close()
f.blockReader = nil
}
}

f.offset = off
return f.offset, nil
}

Expand Down
42 changes: 37 additions & 5 deletions file_reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,19 @@ const (
testStr = "Abominable are the tumblers into which he pours his poison."
testStrOff = 48847

testStr2 = "tumblers"
testStr2Off = 48866
testStr2RelativeOff = 19
testStr2 = "tumblers"
testStr2Off = 48866

testStr3 = "http://www.gutenberg.org"
testStr3Off = 1256988
testStr3NegativeOff = -288

testStr4 = "Moby Dick"
testStr4Off = 34

testStr5 = "LEVIATHAN."
testStr5Off = 8234

testChecksum = "27c076e4987344253650d3335a5d08ce"
)

Expand Down Expand Up @@ -250,7 +255,7 @@ func TestFileSeek(t *testing.T) {
assert.EqualValues(t, testStrOff, off)
br := file.blockReader

off, err = file.Seek(testStr2RelativeOff, 1)
off, err = file.Seek((testStr2Off - testStrOff), 1)
assert.NoError(t, err)
assert.EqualValues(t, testStr2Off, off)

Expand All @@ -263,7 +268,7 @@ func TestFileSeek(t *testing.T) {
assert.EqualValues(t, len(testStr2), n)
assert.EqualValues(t, testStr2, string(buf.Bytes()))

// now seek forward to another block and read a string
// Now seek forward to another block and read.
off, err = file.Seek(testStr3NegativeOff, 2)
assert.NoError(t, err)
assert.EqualValues(t, testStr3Off, off)
Expand All @@ -275,6 +280,33 @@ func TestFileSeek(t *testing.T) {
assert.EqualValues(t, testStr3, string(buf.Bytes()))
}

func TestFileSeekReadSkip(t *testing.T) {
client := getClient(t)

file, err := client.Open("/_test/mobydick.txt")
require.NoError(t, err)

buf := make([]byte, len(testStr4))
n, err := file.ReadAt(buf, testStr4Off)
assert.NoError(t, err)
assert.Equal(t, len(buf), n)
assert.Equal(t, testStr4, string(buf))
br := file.blockReader

off, err := file.Seek(testStr5Off, 0)
assert.NoError(t, err)
assert.EqualValues(t, testStr5Off, off)

// Make sure we didn't reconnect.
assert.Equal(t, br, file.blockReader)

buf = make([]byte, len(testStr5))
n, err = io.ReadFull(file, buf)
assert.NoError(t, err)
assert.Equal(t, len(buf), n)
assert.Equal(t, testStr5, string(buf))
}

func TestFileReadDir(t *testing.T) {
client := getClient(t)

Expand Down
13 changes: 6 additions & 7 deletions internal/transfer/block_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,18 +120,17 @@ func (br *BlockReader) Read(b []byte) (int, error) {
// Skip attempts to discard bytes in the stream in order to skip forward. This
// is an optimization for the case that the amount to skip is very small. It
// returns an error if skip was not attempted at all (because the BlockReader
// isn't connected, or the offset is out of bounds or too far ahead) or the seek
// failed for some other reason.
func (br *BlockReader) Skip(off int64) error {
// isn't connected, or the resulting offset would be out of bounds or too far
// ahead) or the copy failed for some other reason.
func (br *BlockReader) Skip(n int64) error {
blockSize := int64(br.Block.GetB().GetNumBytes())
amountToSkip := off - br.Offset
resultingOffset := br.Offset + n

if br.stream == nil || off < 0 || off >= blockSize ||
amountToSkip < 0 || amountToSkip > maxSkip {
if br.stream == nil || n <= 0 || n > maxSkip || resultingOffset >= blockSize {
return errors.New("unable to skip")
}

_, err := io.CopyN(io.Discard, br.stream, amountToSkip)
_, err := io.CopyN(io.Discard, br.stream, n)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
Expand Down

0 comments on commit 13d9368

Please sign in to comment.