Skip to content

Commit

Permalink
cleanup and function doc comments
Browse files Browse the repository at this point in the history
  • Loading branch information
blaedj committed Feb 3, 2021
1 parent 19d27f9 commit 0378080
Showing 1 changed file with 13 additions and 35 deletions.
48 changes: 13 additions & 35 deletions pkg/dataflatten/string_delimited.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@ import (
"strings"
)

func StringDelimited(rawdata []byte, delimiter string, opts ...FlattenOpts) ([]Row, error) {
return flattenStringDelimited(rawdata, delimiter, opts...)
}

type dataFunc func(data []byte, opts ...FlattenOpts) ([]Row, error)

// StringDelimitedUnseparatedFunc returns a function that conforms to the
// function expected by dataflattentable.Table's execDataFunc property
// interface expected by dataflattentable.Table's execDataFunc property.
// properties are grouped into a single record based on 'duplicate key'
// strategy: If a key/value pair is encountered, and the record being built
// already has a value for that key, then that record is considered 'complete'.
// The record is stored in the collection, and a new record is started. This
// strategy is only suitable if the data output does not exclude k/v pairs with
// blank/missing values, and assumes that the properties for a single record are
// grouped together.
func StringDelimitedUnseparatedFunc(delimiter string) dataFunc {
return func(rawdata []byte, opts ...FlattenOpts) ([]Row, error) {
v := map[string]interface{}{}
Expand Down Expand Up @@ -47,36 +50,11 @@ func StringDelimitedUnseparatedFunc(delimiter string) dataFunc {
}
}

// StringDelimitedUnseparated will decide when to create a new record based on
// when a duplicate key is found.
func StringDelimitedUnseparated(rawdata []byte, delimiter string, opts ...FlattenOpts) ([]Row, error) {
v := map[string]interface{}{}
scanner := bufio.NewScanner(bytes.NewReader(rawdata))
row := map[string]interface{}{}
i := 0
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, delimiter, 2)
if len(parts) < 2 {
// TODO: this is potentially problematic if the output doesn't
// include a separator for blank values. we need to record keys even
// if there is no value for the 'split records by duplicate keys'
// strategy to work
// level.Debug(logger).Log("msg", "not enough parts to get a k/v pair", "line", line)
continue
}
key := parts[0]
value := strings.TrimSpace(parts[1])
if _, ok := row[key]; ok { // this key already exists, so we want to start a new record.
v[strconv.Itoa(i)] = row // record the results in the collection
i++
row = map[string]interface{}{} // reset the record
}
row[key] = value
}
v[strconv.Itoa(i)] = row // store the final result

return Flatten(v, opts...)
// StringDelimited assumes that rawdata only holds key-value pairs for a single
// record. Additionally, each k/v pair must be on its own line. Useful for
// output that can be easily separated into separate records before 'flattening'
func StringDelimited(rawdata []byte, delimiter string, opts ...FlattenOpts) ([]Row, error) {
return flattenStringDelimited(rawdata, delimiter, opts...)
}

func flattenStringDelimited(in []byte, delimiter string, opts ...FlattenOpts) ([]Row, error) {
Expand Down

0 comments on commit 0378080

Please sign in to comment.