Skip to content

Commit

Permalink
feat: add function to parse HTTP header parameters (valyala#1685)
Browse files Browse the repository at this point in the history
* feat: add function to parse HTTP header parameters

The implementation is based on RFC-9110 5.6.6.

* test: add fuzz for VisitHeaderParams
  • Loading branch information
nickajacks1 authored and Max-Cheng committed Feb 12, 2024
1 parent 9ba1646 commit 9eee8cb
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 0 deletions.
98 changes: 98 additions & 0 deletions header.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,104 @@ func (h *ResponseHeader) AddTrailerBytes(trailer []byte) error {
return err
}

// validHeaderFieldByte returns true if c is a valid tchar as defined
// by section 5.6.2 of [RFC9110].
func validHeaderFieldByte(c byte) bool {
return c < 128 && tcharTable[c]
}

// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
// / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
// / DIGIT / ALPHA
//
// See: https://www.rfc-editor.org/rfc/rfc9110#tokens
var tcharTable = [128]bool{
'!': true, '#': true, '$': true, '%': true, '&': true, '\'': true, '*': true, '+': true,
'-': true, '.': true, '^': true, '_': true, '`': true, '|': true, '~': true,
'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, '8': true, '9': true,
'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true,
'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true,
'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true,
'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true,
'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true,
'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true,
'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true,
'v': true, 'w': true, 'x': true, 'y': true, 'z': true,
}

// VisitHeaderParams calls f for each parameter in the given header bytes.
// It stops processing when f returns false or an invalid parameter is found.
// Parameter values may be quoted, in which case \ is treated as an escape
// character, and the value is unquoted before being passed to value.
// See: https://www.rfc-editor.org/rfc/rfc9110#section-5.6.6
//
// f must not retain references to key and/or value after returning.
// Copy key and/or value contents before returning if you need retaining them.
func VisitHeaderParams(b []byte, f func(key, value []byte) bool) {
for len(b) > 0 {
idxSemi := 0
for idxSemi < len(b) && b[idxSemi] != ';' {
idxSemi++
}
if idxSemi >= len(b) {
return
}
b = b[idxSemi+1:]
for len(b) > 0 && b[0] == ' ' {
b = b[1:]
}

n := 0
if len(b) == 0 || !validHeaderFieldByte(b[n]) {
return
}
n++
for n < len(b) && validHeaderFieldByte(b[n]) {
n++
}

if n >= len(b)-1 || b[n] != '=' {
return
}
param := b[:n]
n++

switch {
case validHeaderFieldByte(b[n]):
m := n
n++
for n < len(b) && validHeaderFieldByte(b[n]) {
n++
}
if !f(param, b[m:n]) {
return
}
case b[n] == '"':
foundEndQuote := false
escaping := false
n++
m := n
for ; n < len(b); n++ {
if b[n] == '"' && !escaping {
foundEndQuote = true
break
}
escaping = (b[n] == '\\' && !escaping)
}
if !foundEndQuote {
return
}
if !f(param, b[m:n]) {
return
}
n++
default:
return
}
b = b[n:]
}
}

// MultipartFormBoundary returns boundary part
// from 'multipart/form-data; boundary=...' Content-Type.
func (h *RequestHeader) MultipartFormBoundary() []byte {
Expand Down
69 changes: 69 additions & 0 deletions header_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,75 @@ func testRequestHeaderHasAcceptEncoding(t *testing.T, ae, v string, resultExpect
}
}

func TestVisitHeaderParams(t *testing.T) {
t.Parallel()
testVisitHeaderParams(t, "text/plain;charset=utf-8;q=0.39", [][2]string{{"charset", "utf-8"}, {"q", "0.39"}})
testVisitHeaderParams(t, "text/plain; foo=bar ;", [][2]string{{"foo", "bar"}})
testVisitHeaderParams(t, `text/plain; foo="bar"; `, [][2]string{{"foo", "bar"}})
testVisitHeaderParams(t, `text/plain; foo="text/plain,text/html;charset=\"utf-8\""`, [][2]string{{"foo", `text/plain,text/html;charset=\"utf-8\"`}})
testVisitHeaderParams(t, "text/plain foo=bar", [][2]string{})
testVisitHeaderParams(t, "text/plain;", [][2]string{})
testVisitHeaderParams(t, "text/plain; ", [][2]string{})
testVisitHeaderParams(t, "text/plain; foo", [][2]string{})
testVisitHeaderParams(t, "text/plain; foo=", [][2]string{})
testVisitHeaderParams(t, "text/plain; =bar", [][2]string{})
testVisitHeaderParams(t, "text/plain; foo = bar", [][2]string{})
testVisitHeaderParams(t, `text/plain; foo="bar`, [][2]string{})
testVisitHeaderParams(t, "text/plain;;foo=bar", [][2]string{})

parsed := make([][2]string, 0)
VisitHeaderParams([]byte(`text/plain; foo=bar; charset=utf-8`), func(key, value []byte) bool {
parsed = append(parsed, [2]string{string(key), string(value)})
return !bytes.Equal(key, []byte("foo"))
})

if len(parsed) != 1 {
t.Fatalf("expected 1 HTTP parameter, parsed %v", len(parsed))
}

if parsed[0] != [2]string{"foo", "bar"} {
t.Fatalf("unexpected parameter %v=%v. Expecting foo=bar", parsed[0][0], parsed[0][1])
}
}

func testVisitHeaderParams(t *testing.T, header string, expectedParams [][2]string) {
parsed := make([][2]string, 0)
VisitHeaderParams([]byte(header), func(key, value []byte) bool {
parsed = append(parsed, [2]string{string(key), string(value)})
return true
})

if len(parsed) != len(expectedParams) {
t.Fatalf("expected %v HTTP parameters, parsed %v", len(expectedParams), len(parsed))
}

for i := range expectedParams {
if expectedParams[i] != parsed[i] {
t.Fatalf("unexpected parameter %v=%v. Expecting %v=%v", parsed[i][0], parsed[i][1], expectedParams[i][0], expectedParams[i][1])
}
}
}

func FuzzVisitHeaderParams(f *testing.F) {
inputs := []string{
`application/json; v=1; foo=bar; q=0.938; param=param; param="big fox"; q=0.43`,
`*/*`,
`\\`,
`text/plain; foo="\\\"\'\\''\'"`,
}
for _, input := range inputs {
f.Add([]byte(input))
}
f.Fuzz(func(t *testing.T, header []byte) {
VisitHeaderParams(header, func(key, value []byte) bool {
if len(key) == 0 {
t.Errorf("Unexpected length zero parameter, failed input was: %s", header)
}
return true
})
})
}

func TestRequestMultipartFormBoundary(t *testing.T) {
t.Parallel()

Expand Down
13 changes: 13 additions & 0 deletions header_timing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,19 @@ func benchmarkNormalizeHeaderKey(b *testing.B, src []byte) {
})
}

func BenchmarkVisitHeaderParams(b *testing.B) {
var h RequestHeader
h.SetBytesKV(strContentType, []byte(`text/plain ; foo=bar ; param2="dquote is: [\"], ok?" ; version=1; q=0.324 `))

header := h.ContentType()
b.ReportAllocs()
b.ResetTimer()

for n := 0; n < b.N; n++ {
VisitHeaderParams(header, func(key, value []byte) bool { return true })
}
}

func BenchmarkRemoveNewLines(b *testing.B) {
type testcase struct {
value string
Expand Down

0 comments on commit 9eee8cb

Please sign in to comment.