diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 0b799cb12be..1d5cafcb155 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -101,6 +101,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix storage metricset to allow config without region/zone. {issue}17623[17623] {pull}17624[17624] - Fix overflow on Prometheus rates when new buckets are added on the go. {pull}17753[17753] - Add a switch to the driver definition on SQL module to use pretty names {pull}17378[17378] +- The Kibana collector applies backoff when errored at getting usage stats {pull}20772[20772] - The `elasticsearch/index` metricset only requests wildcard expansion for hidden indices if the monitored Elasticsearch cluster supports it. {pull}20938[20938] - Fix panic index out of range error when getting AWS account name. {pull}21101[21101] {issue}21095[21095] - Handle missing counters in the application_pool metricset. {pull}21071[21071] diff --git a/metricbeat/module/kibana/stats/stats.go b/metricbeat/module/kibana/stats/stats.go index 526a525951c..a6e19d50f42 100644 --- a/metricbeat/module/kibana/stats/stats.go +++ b/metricbeat/module/kibana/stats/stats.go @@ -38,9 +38,10 @@ func init() { } const ( - statsPath = "api/stats" - settingsPath = "api/settings" - usageCollectionPeriod = 24 * time.Hour + statsPath = "api/stats" + settingsPath = "api/settings" + usageCollectionPeriod = 24 * time.Hour + usageCollectionBackoff = 1 * time.Hour ) var ( @@ -57,6 +58,7 @@ type MetricSet struct { statsHTTP *helper.HTTP settingsHTTP *helper.HTTP usageLastCollectedOn time.Time + usageNextCollectOn time.Time isUsageExcludable bool } @@ -165,6 +167,10 @@ func (m *MetricSet) fetchStats(r mb.ReporterV2, now time.Time) error { content, err = m.statsHTTP.FetchContent() if err != nil { + if shouldCollectUsage { + // When errored in collecting the usage stats it may be counterproductive to try again on the next poll, try to collect the stats again after usageCollectionBackoff + m.usageNextCollectOn = now.Add(usageCollectionBackoff) + } return err } @@ -215,5 +221,5 @@ func (m *MetricSet) calculateIntervalMs() int64 { } func (m *MetricSet) shouldCollectUsage(now time.Time) bool { - return now.Sub(m.usageLastCollectedOn) > usageCollectionPeriod + return now.Sub(m.usageLastCollectedOn) > usageCollectionPeriod && now.Sub(m.usageNextCollectOn) > 0 } diff --git a/metricbeat/module/kibana/stats/stats_test.go b/metricbeat/module/kibana/stats/stats_test.go index b6757e5ecb2..56cbfc17e1f 100644 --- a/metricbeat/module/kibana/stats/stats_test.go +++ b/metricbeat/module/kibana/stats/stats_test.go @@ -23,6 +23,7 @@ import ( "net/http" "net/http/httptest" "testing" + "time" "github.com/stretchr/testify/require" @@ -48,12 +49,12 @@ func TestFetchUsage(t *testing.T) { w.WriteHeader(503) case 1: // second call - // Make sure exclude_usage is still false since first call failed - require.Equal(t, "false", excludeUsage) + // Make sure exclude_usage is true since first call failed and it should not try again until usageCollectionBackoff time has passed + require.Equal(t, "true", excludeUsage) w.WriteHeader(200) case 2: // third call - // Make sure exclude_usage is now true since second call succeeded + // Make sure exclude_usage is still true require.Equal(t, "true", excludeUsage) w.WriteHeader(200) } @@ -76,3 +77,40 @@ func TestFetchUsage(t *testing.T) { // Third fetch mbtest.ReportingFetchV2Error(f) } + +func TestShouldCollectUsage(t *testing.T) { + now := time.Now() + + cases := map[string]struct { + usageLastCollectedOn time.Time + usageNextCollectOn time.Time + expectedResult bool + }{ + "within_usage_collection_period": { + usageLastCollectedOn: now.Add(-1 * usageCollectionPeriod), + expectedResult: false, + }, + "after_usage_collection_period_but_before_next_scheduled_collection": { + usageLastCollectedOn: now.Add(-2 * usageCollectionPeriod), + usageNextCollectOn: now.Add(3 * time.Hour), + expectedResult: false, + }, + "after_usage_collection_period_and_after_next_scheduled_collection": { + usageLastCollectedOn: now.Add(-2 * usageCollectionPeriod), + usageNextCollectOn: now.Add(-1 * time.Hour), + expectedResult: true, + }, + } + + for name, test := range cases { + t.Run(name, func(t *testing.T) { + m := MetricSet{ + usageLastCollectedOn: test.usageLastCollectedOn, + usageNextCollectOn: test.usageNextCollectOn, + } + + actualResult := m.shouldCollectUsage(now) + require.Equal(t, test.expectedResult, actualResult) + }) + } +}