-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
byteSliceReader compatibility with flate.Reader interface #978
Conversation
Add compatibility with flate.Reader to reduce allocations of bufio.NewReader structs backed by default size slices.
This is going to increase allocation. It'd be better to use: |
I don't think this is going to cause an allocation. The same backing array will be used and the data pointer in slice will just be overwritten. I had no idea |
Right! Though the content was appended at the end of the slice. |
Also looks like type slicer []byte
func (z *slicer) Read(p []byte) (int, error) {
if len(*z) == 0 {
return 0, io.EOF
}
n := copy(p, *z)
*z = (*z)[n:]
return n, nil
}
func (z *slicer) ReadByte() (byte, error) {
if len(*z) == 0 {
return 0, io.EOF
}
n := (*z)[0]
*z = (*z)[1:]
return n, nil
} Should perform a few nanoseconds better than a struct: func BenchmarkBytesReadSlice(b *testing.B) {
zb := make([]byte, 1024)
z := bytes.Repeat([]byte("a"), 1024 * 1024)
b.Run("ReadByteStruct", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b := &byteSliceReader{z}
ReadByte(b)
}
})
b.Run("ReadByteSlice", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b := slicer(z)
ReadByte(&b)
}
})
b.Run("BytesBufferReadSlice", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b := bytes.NewReader(z)
ReadByte(b)
}
})
b.Run("BytesBufferReadSlice", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b := &byteSliceReader2{z, 0}
ReadByte(b)
}
})
b.Run("ReadStruct", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b := &byteSliceReader{z}
Read(b, zb)
}
})
b.Run("ReadSlice", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b := slicer(z)
Read(&b, zb)
}
})
b.Run("BytesBufferRead", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b := bytes.NewReader(z)
Read(b, zb)
}
})
b.Run("BytesBufferRead2", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b := &byteSliceReader2{z, 0}
Read(b, zb)
}
})
}
type byteSliceReader2 struct {
b []byte
i int64
}
func (r *byteSliceReader2) Read(b []byte) (n int, err error) {
if r.i >= int64(len(r.b)) {
return 0, io.EOF
}
n = copy(b, r.b[r.i:])
r.i += int64(n)
return n, nil
}
func (r *byteSliceReader2) ReadByte() (byte, error) {
if r.i >= int64(len(r.b)) {
return 0, io.EOF
}
b := r.b[r.i]
r.i++
return b, nil
}
func Read(r flate.Reader, zb []byte) {
for {
_, err := r.Read(zb)
if err == io.EOF {
return
}
}
}
func ReadByte(r flate.Reader) {
for {
_, err := r.ReadByte()
if err == io.EOF {
return
}
}
}
|
@moredure I think few nanos doesn't make that much of a difference, but I think cache-wise should be better right? I don't know exactly how Golang manages memory, but having a pointer to a struct looks like it makes the computer perform 2 lookups (?). One lookup for the pointer where the structure is and then another lookup for the pointer to the slice. |
Anyway there will be a need to get address (&) of slicer instance to allow passing to interface function parameters (io.Reader, flate.Reader, etc) |
If flate readers using ReadByte frequently maybe bytes.NewReader is better) than custom solution) |
In the end type byteSliceReader2 struct {
b []byte
i int64
}
func (r *byteSliceReader2) Read(b []byte) (n int, err error) {
if r.i >= int64(len(r.b)) {
return 0, io.EOF
}
n = copy(b, r.b[r.i:])
r.i += int64(n)
return n, nil
}
func (r *byteSliceReader2) ReadByte() (byte, error) {
if r.i >= int64(len(r.b)) {
return 0, io.EOF
}
b := r.b[r.i]
r.i++
return b, nil
} same code as bytes.Reader but without prevRune) in case of ReadByte only payload performs best, not sure if it fits flate.Reader usage |
I think the current change is good enough and is in line with how the rest works. If we want to try and speed up byteSliceReader as a whole that should be a different pull request. |
Add compatibility with flate.Reader to reduce allocations of bufio.NewReader structs backed by default size slices of 4096 bytes for WriteGunzip, WriteDeflate.