From e437f90f1ab5e3419ed12d5559fcc8d3cc9773b7 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Wed, 18 Oct 2023 17:11:00 -0400 Subject: [PATCH] [receiver/filelog] Implement specifying top n files to track when ordering (#27844) **Description:** * Add a new `ordering_criteria.top_n` option, which allows a user to specify the number of files to track after ordering. * Default is 1, which was the existing behavior. **Link to tracking Issue:** #23788 **Testing:** Unit tests added. **Documentation:** Added new parameter to existing documentation. --- .chloggen/feat_top_n_file_sorting.yaml | 22 ++++ pkg/stanza/fileconsumer/config_test.go | 10 ++ pkg/stanza/fileconsumer/matcher/matcher.go | 23 +++- .../fileconsumer/matcher/matcher_test.go | 114 ++++++++++++++++++ pkg/stanza/fileconsumer/testdata/config.yaml | 4 + receiver/filelogreceiver/README.md | 1 + 6 files changed, 171 insertions(+), 3 deletions(-) create mode 100755 .chloggen/feat_top_n_file_sorting.yaml diff --git a/.chloggen/feat_top_n_file_sorting.yaml b/.chloggen/feat_top_n_file_sorting.yaml new file mode 100755 index 000000000000..1a4e678bae36 --- /dev/null +++ b/.chloggen/feat_top_n_file_sorting.yaml @@ -0,0 +1,22 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: "enhancement" + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: filelogreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add a new "top_n" option to specify the number of files to track when using ordering criteria + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [23788] + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: ["user"] diff --git a/pkg/stanza/fileconsumer/config_test.go b/pkg/stanza/fileconsumer/config_test.go index 9d83118aa4bd..43171be5c96c 100644 --- a/pkg/stanza/fileconsumer/config_test.go +++ b/pkg/stanza/fileconsumer/config_test.go @@ -412,6 +412,16 @@ func TestUnmarshal(t *testing.T) { return newMockOperatorConfig(cfg) }(), }, + { + Name: "ordering_criteria_top_n", + Expect: func() *mockOperatorConfig { + cfg := NewConfig() + cfg.OrderingCriteria = matcher.OrderingCriteria{ + TopN: 10, + } + return newMockOperatorConfig(cfg) + }(), + }, }, }.Run(t) } diff --git a/pkg/stanza/fileconsumer/matcher/matcher.go b/pkg/stanza/fileconsumer/matcher/matcher.go index 0a7a0628edac..76cdd1bd4feb 100644 --- a/pkg/stanza/fileconsumer/matcher/matcher.go +++ b/pkg/stanza/fileconsumer/matcher/matcher.go @@ -18,6 +18,10 @@ const ( sortTypeAlphabetical = "alphabetical" ) +const ( + defaultOrderingCriteriaTopN = 1 +) + type Criteria struct { Include []string `mapstructure:"include,omitempty"` Exclude []string `mapstructure:"exclude,omitempty"` @@ -26,6 +30,7 @@ type Criteria struct { type OrderingCriteria struct { Regex string `mapstructure:"regex,omitempty"` + TopN int `mapstructure:"top_n,omitempty"` SortBy []Sort `mapstructure:"sort_by,omitempty"` } @@ -62,6 +67,14 @@ func New(c Criteria) (*Matcher, error) { return nil, fmt.Errorf("'regex' must be specified when 'sort_by' is specified") } + if c.OrderingCriteria.TopN < 0 { + return nil, fmt.Errorf("'top_n' must be a positive integer") + } + + if c.OrderingCriteria.TopN == 0 { + c.OrderingCriteria.TopN = defaultOrderingCriteriaTopN + } + regex, err := regexp.Compile(c.OrderingCriteria.Regex) if err != nil { return nil, fmt.Errorf("compile regex: %w", err) @@ -97,6 +110,7 @@ func New(c Criteria) (*Matcher, error) { include: c.Include, exclude: c.Exclude, regex: regex, + topN: c.OrderingCriteria.TopN, filterOpts: filterOpts, }, nil } @@ -105,6 +119,7 @@ type Matcher struct { include []string exclude []string regex *regexp.Regexp + topN int filterOpts []filter.Option } @@ -127,7 +142,9 @@ func (m Matcher) MatchFiles() ([]string, error) { return result, errors.Join(err, errs) } - // Return only the first item. - // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/23788 - return result[:1], errors.Join(err, errs) + if len(result) <= m.topN { + return result, errors.Join(err, errs) + } + + return result[:m.topN], errors.Join(err, errs) } diff --git a/pkg/stanza/fileconsumer/matcher/matcher_test.go b/pkg/stanza/fileconsumer/matcher/matcher_test.go index c838962a4699..1d9de6f17f87 100644 --- a/pkg/stanza/fileconsumer/matcher/matcher_test.go +++ b/pkg/stanza/fileconsumer/matcher/matcher_test.go @@ -98,6 +98,23 @@ func TestNew(t *testing.T) { }, expectedErr: "compile regex: error parsing regexp: missing closing ]: `[a-z`", }, + { + name: "TopN is negative", + criteria: Criteria{ + Include: []string{"*.log"}, + OrderingCriteria: OrderingCriteria{ + Regex: "[a-z]", + TopN: -1, + SortBy: []Sort{ + { + SortType: "numeric", + RegexKey: "key", + }, + }, + }, + }, + expectedErr: "'top_n' must be a positive integer", + }, { name: "SortTypeEmpty", criteria: Criteria{ @@ -249,6 +266,46 @@ func TestMatcher(t *testing.T) { }, expected: []string{"err.2023020612.log"}, }, + { + name: "TopN > number of files", + files: []string{"err.2023020611.log", "err.2023020612.log"}, + include: []string{"err.*.log"}, + exclude: []string{}, + filterCriteria: OrderingCriteria{ + Regex: `err\.(?P\d{4}\d{2}\d{2}\d{2}).*log`, + TopN: 3, + SortBy: []Sort{ + { + SortType: sortTypeTimestamp, + RegexKey: "value", + Ascending: false, + Location: "UTC", + Layout: `%Y%m%d%H`, + }, + }, + }, + expected: []string{"err.2023020612.log", "err.2023020611.log"}, + }, + { + name: "TopN == number of files", + files: []string{"err.2023020611.log", "err.2023020612.log"}, + include: []string{"err.*.log"}, + exclude: []string{}, + filterCriteria: OrderingCriteria{ + Regex: `err\.(?P\d{4}\d{2}\d{2}\d{2}).*log`, + TopN: 2, + SortBy: []Sort{ + { + SortType: sortTypeTimestamp, + RegexKey: "value", + Ascending: false, + Location: "UTC", + Layout: `%Y%m%d%H`, + }, + }, + }, + expected: []string{"err.2023020612.log", "err.2023020611.log"}, + }, { name: "Timestamp Sorting Ascending", files: []string{"err.2023020612.log", "err.2023020611.log", "err.2023020609.log", "err.2023020610.log"}, @@ -319,6 +376,24 @@ func TestMatcher(t *testing.T) { }, expected: []string{"err.d.log"}, }, + { + name: "Alphabetical Sorting - Top 2", + files: []string{"err.a.log", "err.d.log", "err.b.log", "err.c.log"}, + include: []string{"err.*.log"}, + exclude: []string{}, + filterCriteria: OrderingCriteria{ + Regex: `err\.(?P[a-zA-Z]+).*log`, + TopN: 2, + SortBy: []Sort{ + { + SortType: sortTypeAlphabetical, + RegexKey: "value", + Ascending: false, + }, + }, + }, + expected: []string{"err.d.log", "err.c.log"}, + }, { name: "Alphabetical Sorting Ascending", files: []string{"err.b.log", "err.a.log", "err.c.log", "err.d.log"}, @@ -336,6 +411,45 @@ func TestMatcher(t *testing.T) { }, expected: []string{"err.a.log"}, }, + { + name: "Multiple Sorting - timestamp priority sort - Top 4", + files: []string{ + "err.b.1.2023020601.log", + "err.b.2.2023020601.log", + "err.a.1.2023020601.log", + "err.a.2.2023020601.log", + "err.b.1.2023020602.log", + "err.a.2.2023020602.log", + "err.b.2.2023020602.log", + "err.a.1.2023020602.log", + }, + include: []string{"err.*.log"}, + exclude: []string{}, + filterCriteria: OrderingCriteria{ + Regex: `err\.(?P[a-zA-Z])\.(?P\d+)\.(?P