From 599f75a481dc00d043966f2570436a481f8bf648 Mon Sep 17 00:00:00 2001 From: Chris Cowan Date: Mon, 28 Feb 2022 13:47:57 -0700 Subject: [PATCH] [Metrics UI] Increase composite size to 10K for Metric Threshold Rule and optimize processing (#121904) (#126506) * [Metrics UI] Increase composite size for Metric Threshold Rule to 10K * Adding performance optimizations * Fixing metrics_alerting integration test * fixing tests * Fixing integration test and config mock * Removing the setTimeout code to simplify to a for/of * Adding new setting to docs * Adding metric_threshold identifier to the config setting (cherry picked from commit ae0c8d5ecb6bbd6099b7ab57aaae5adea790490c) # Conflicts: # x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_rule.ts --- .../general-infra-logs-ui-settings.asciidoc | 5 +- .../metric_threshold/lib/evaluate_rule.ts | 61 +++++++++---------- .../metric_threshold/lib/metric_query.test.ts | 3 +- .../metric_threshold/lib/metric_query.ts | 5 +- .../metric_threshold_executor.test.ts | 5 ++ .../metric_threshold_executor.ts | 5 +- .../plugins/infra/server/lib/infra_types.ts | 2 +- .../infra/server/lib/sources/sources.test.ts | 5 ++ x-pack/plugins/infra/server/plugin.ts | 13 ++-- x-pack/plugins/infra/server/types.ts | 18 ++++++ .../apis/metrics_ui/metric_threshold_alert.ts | 59 +++++++++++++++--- .../apis/metrics_ui/metrics_alerting.ts | 5 +- 12 files changed, 133 insertions(+), 53 deletions(-) diff --git a/docs/settings/general-infra-logs-ui-settings.asciidoc b/docs/settings/general-infra-logs-ui-settings.asciidoc index d56c38f120170a..5fb20d6622b43c 100644 --- a/docs/settings/general-infra-logs-ui-settings.asciidoc +++ b/docs/settings/general-infra-logs-ui-settings.asciidoc @@ -21,4 +21,7 @@ Field used to identify hosts. Defaults to `host.name`. Field used to identify Docker containers. Defaults to `container.id`. `xpack.infra.sources.default.fields.pod`:: -Field used to identify Kubernetes pods. Defaults to `kubernetes.pod.uid`. \ No newline at end of file +Field used to identify Kubernetes pods. Defaults to `kubernetes.pod.uid`. + +`xpack.infra.alerting.metric_threshold.group_by_page_size`:: +Controls the size of the composite aggregations used by the Metric Threshold group by feature. Defaults to `10000`. \ No newline at end of file diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_rule.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_rule.ts index 0906ad48713f10..823a0ec21dd48b 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_rule.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_rule.ts @@ -6,8 +6,8 @@ */ import { ElasticsearchClient } from 'kibana/server'; -import { first, has, isNaN, isNumber, isObject, last, mapValues } from 'lodash'; import moment from 'moment'; +import { difference, mapValues, first, last, isNaN, isNumber, isObject, has } from 'lodash'; import { Aggregators, Comparator, @@ -68,6 +68,7 @@ export const evaluateRule = { const { criteria, groupBy, filterQuery, shouldDropPartialBuckets } = params; @@ -80,6 +81,7 @@ export const evaluateRule = !currentGroups.includes(g)); + const missingGroups = difference(prevGroups, currentGroups); + if (currentGroups.length === 0 && missingGroups.length === 0) { missingGroups.push(UNGROUPED_FACTORY_KEY); } const backfillTimestamp = last(last(Object.values(currentValues)))?.key ?? new Date().toISOString(); - const backfilledPrevGroups: Record< - string, - Array<{ key: string; value: number }> - > = missingGroups.reduce( - (result, group) => ({ - ...result, - [group]: [ - { - key: backfillTimestamp, - value: criterion.aggType === Aggregators.COUNT ? 0 : null, - }, - ], - }), - {} - ); + const backfilledPrevGroups: Record> = {}; + for (const group of missingGroups) { + backfilledPrevGroups[group] = [ + { + key: backfillTimestamp, + value: criterion.aggType === Aggregators.COUNT ? 0 : null, + }, + ]; + } const currentValuesWithBackfilledPrevGroups = { ...currentValues, ...backfilledPrevGroups, @@ -152,6 +149,7 @@ const getMetric: ( index: string, groupBy: string | undefined | string[], filterQuery: string | undefined, + compositeSize: number, timeframe?: { start?: number; end: number }, shouldDropPartialBuckets?: boolean ) => Promise>> = async function ( @@ -160,6 +158,7 @@ const getMetric: ( index, groupBy, filterQuery, + compositeSize, timeframe, shouldDropPartialBuckets ) { @@ -174,6 +173,7 @@ const getMetric: ( const searchBody = getElasticsearchMetricQuery( params, calculatedTimerange, + compositeSize, hasGroupBy ? groupBy : undefined, filterQuery ); @@ -204,21 +204,18 @@ const getMetric: ( bucketSelector, afterKeyHandler )) as Array; doc_count: number }>; - const groupedResults = compositeBuckets.reduce( - (result, bucket) => ({ - ...result, - [Object.values(bucket.key) - .map((value) => value) - .join(', ')]: getValuesFromAggregations( - bucket, - aggType, - dropPartialBucketsOptions, - calculatedTimerange, - bucket.doc_count - ), - }), - {} - ); + const groupedResults: Record = {}; + for (const bucket of compositeBuckets) { + const key = Object.values(bucket.key).join(', '); + const value = getValuesFromAggregations( + bucket, + aggType, + dropPartialBucketsOptions, + calculatedTimerange, + bucket.doc_count + ); + groupedResults[key] = value; + } return groupedResults; } const { body: result } = await esClient.search({ diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.test.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.test.ts index 4fbdf72a7429cf..7e26bc2ba6be67 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.test.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.test.ts @@ -24,7 +24,7 @@ describe("The Metric Threshold Alert's getElasticsearchMetricQuery", () => { }; describe('when passed no filterQuery', () => { - const searchBody = getElasticsearchMetricQuery(expressionParams, timeframe, groupBy); + const searchBody = getElasticsearchMetricQuery(expressionParams, timeframe, 100, groupBy); test('includes a range filter', () => { expect( searchBody.query.bool.filter.find((filter) => filter.hasOwnProperty('range')) @@ -47,6 +47,7 @@ describe("The Metric Threshold Alert's getElasticsearchMetricQuery", () => { const searchBody = getElasticsearchMetricQuery( expressionParams, timeframe, + 100, groupBy, filterQuery ); diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts index 56c9d5d14d0141..5f7d643ec22eb3 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts @@ -11,8 +11,6 @@ import { networkTraffic } from '../../../../../common/inventory_models/shared/me import { calculateDateHistogramOffset } from '../../../metrics/lib/calculate_date_histogram_offset'; import { createPercentileAggregation } from './create_percentile_aggregation'; -const COMPOSITE_RESULTS_PER_PAGE = 100; - const getParsedFilterQuery: (filterQuery: string | undefined) => Record | null = ( filterQuery ) => { @@ -23,6 +21,7 @@ const getParsedFilterQuery: (filterQuery: string | undefined) => Record { @@ -73,7 +72,7 @@ export const getElasticsearchMetricQuery = ( ? { groupings: { composite: { - size: COMPOSITE_RESULTS_PER_PAGE, + size: compositeSize, sources: Array.isArray(groupBy) ? groupBy.map((field, index) => ({ [`groupBy${index}`]: { diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts index b59ca5245db982..303947b0b93edc 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts @@ -747,6 +747,11 @@ describe('The metric threshold alert type', () => { }); const createMockStaticConfiguration = (sources: any) => ({ + alerting: { + metric_threshold: { + group_by_page_size: 100, + }, + }, inventory: { compositeSize: 2000, }, diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.ts index 371e75451c5792..8f6c359b150c12 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.ts @@ -118,6 +118,7 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) => sourceId || 'default' ); const config = source.configuration; + const compositeSize = libs.configuration.alerting.metric_threshold.group_by_page_size; const previousGroupBy = state.groupBy; const previousFilterQuery = state.filterQuery; @@ -135,7 +136,8 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) => services.scopedClusterClient.asCurrentUser, params as EvaluatedRuleParams, config, - prevGroups + prevGroups, + compositeSize ); // Because each alert result has the same group definitions, just grab the groups from the first one. @@ -248,7 +250,6 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) => }); } } - return { groups, groupBy: params.groupBy, filterQuery: params.filterQuery }; }); diff --git a/x-pack/plugins/infra/server/lib/infra_types.ts b/x-pack/plugins/infra/server/lib/infra_types.ts index 332a2e499977bb..bfac01d3138f4a 100644 --- a/x-pack/plugins/infra/server/lib/infra_types.ts +++ b/x-pack/plugins/infra/server/lib/infra_types.ts @@ -6,7 +6,7 @@ */ import { handleEsError } from '../../../../../src/plugins/es_ui_shared/server'; -import { InfraConfig } from '../plugin'; +import { InfraConfig } from '../types'; import { GetLogQueryFields } from '../services/log_queries/get_log_query_fields'; import { RulesServiceSetup } from '../services/rules'; import { KibanaFramework } from './adapters/framework/kibana_framework_adapter'; diff --git a/x-pack/plugins/infra/server/lib/sources/sources.test.ts b/x-pack/plugins/infra/server/lib/sources/sources.test.ts index 396d2c22a100f0..adcec1bf055ea8 100644 --- a/x-pack/plugins/infra/server/lib/sources/sources.test.ts +++ b/x-pack/plugins/infra/server/lib/sources/sources.test.ts @@ -109,6 +109,11 @@ describe('the InfraSources lib', () => { }); const createMockStaticConfiguration = (sources: any) => ({ + alerting: { + metric_threshold: { + group_by_page_size: 10000, + }, + }, enabled: true, inventory: { compositeSize: 2000, diff --git a/x-pack/plugins/infra/server/plugin.ts b/x-pack/plugins/infra/server/plugin.ts index 79998ae6d56909..32562e319e1e59 100644 --- a/x-pack/plugins/infra/server/plugin.ts +++ b/x-pack/plugins/infra/server/plugin.ts @@ -6,7 +6,7 @@ */ import { Server } from '@hapi/hapi'; -import { schema, TypeOf } from '@kbn/config-schema'; +import { schema } from '@kbn/config-schema'; import { i18n } from '@kbn/i18n'; import { Logger } from '@kbn/logging'; import { @@ -35,15 +35,20 @@ import { InfraBackendLibs, InfraDomainLibs } from './lib/infra_types'; import { infraSourceConfigurationSavedObjectType, InfraSources } from './lib/sources'; import { InfraSourceStatus } from './lib/source_status'; import { LogEntriesService } from './services/log_entries'; -import { InfraPluginRequestHandlerContext } from './types'; +import { InfraPluginRequestHandlerContext, InfraConfig } from './types'; import { UsageCollector } from './usage/usage_collector'; import { createGetLogQueryFields } from './services/log_queries/get_log_query_fields'; import { handleEsError } from '../../../../src/plugins/es_ui_shared/server'; import { RulesService } from './services/rules'; import { configDeprecations, getInfraDeprecationsFactory } from './deprecations'; -export const config: PluginConfigDescriptor = { +export const config: PluginConfigDescriptor = { schema: schema.object({ + alerting: schema.object({ + metric_threshold: schema.object({ + group_by_page_size: schema.number({ defaultValue: 10000 }), + }), + }), inventory: schema.object({ compositeSize: schema.number({ defaultValue: 2000 }), }), @@ -64,7 +69,7 @@ export const config: PluginConfigDescriptor = { deprecations: configDeprecations, }; -export type InfraConfig = TypeOf; +export type { InfraConfig }; export interface KbnServer extends Server { usage: any; diff --git a/x-pack/plugins/infra/server/types.ts b/x-pack/plugins/infra/server/types.ts index 5cae0158619460..9bfdc4784d26c2 100644 --- a/x-pack/plugins/infra/server/types.ts +++ b/x-pack/plugins/infra/server/types.ts @@ -31,3 +31,21 @@ export interface InfraPluginRequestHandlerContext extends RequestHandlerContext infra: InfraRequestHandlerContext; search: SearchRequestHandlerContext; } + +export interface InfraConfig { + alerting: { + metric_threshold: { + group_by_page_size: number; + }; + }; + inventory: { + compositeSize: number; + }; + sources?: { + default?: { + fields?: { + message?: string[]; + }; + }; + }; +} diff --git a/x-pack/test/api_integration/apis/metrics_ui/metric_threshold_alert.ts b/x-pack/test/api_integration/apis/metrics_ui/metric_threshold_alert.ts index 337d31a0a51b04..7e00044ef13659 100644 --- a/x-pack/test/api_integration/apis/metrics_ui/metric_threshold_alert.ts +++ b/x-pack/test/api_integration/apis/metrics_ui/metric_threshold_alert.ts @@ -100,7 +100,7 @@ export default function ({ getService }: FtrProviderContext) { }; const timeFrame = { end: DATES.ten_thousand_plus.max }; const kbnClient = convertToKibanaClient(esClient); - const results = await evaluateRule(kbnClient, params, config, [], timeFrame); + const results = await evaluateRule(kbnClient, params, config, [], 10000, timeFrame); expect(results).to.eql([ { '*': { @@ -142,7 +142,7 @@ export default function ({ getService }: FtrProviderContext) { }; const timeFrame = { end: DATES.ten_thousand_plus.max }; const kbnClient = convertToKibanaClient(esClient); - const results = await evaluateRule(kbnClient, params, config, [], timeFrame); + const results = await evaluateRule(kbnClient, params, config, [], 10000, timeFrame); expect(results).to.eql([ { web: { @@ -184,7 +184,14 @@ export default function ({ getService }: FtrProviderContext) { }; const timeFrame = { end: gauge.max }; const kbnClient = convertToKibanaClient(esClient); - const results = await evaluateRule(kbnClient, params, configuration, [], timeFrame); + const results = await evaluateRule( + kbnClient, + params, + configuration, + [], + 10000, + timeFrame + ); expect(results).to.eql([ { '*': { @@ -208,7 +215,14 @@ export default function ({ getService }: FtrProviderContext) { const params = { ...baseParams }; const timeFrame = { end: gauge.max }; const kbnClient = convertToKibanaClient(esClient); - const results = await evaluateRule(kbnClient, params, configuration, [], timeFrame); + const results = await evaluateRule( + kbnClient, + params, + configuration, + [], + 10000, + timeFrame + ); expect(results).to.eql([ { '*': { @@ -246,7 +260,14 @@ export default function ({ getService }: FtrProviderContext) { }; const timeFrame = { end: gauge.max }; const kbnClient = convertToKibanaClient(esClient); - const results = await evaluateRule(kbnClient, params, configuration, [], timeFrame); + const results = await evaluateRule( + kbnClient, + params, + configuration, + [], + 10000, + timeFrame + ); expect(results).to.eql([ { dev: { @@ -287,7 +308,14 @@ export default function ({ getService }: FtrProviderContext) { }; const timeFrame = { end: gauge.max }; const kbnClient = convertToKibanaClient(esClient); - const results = await evaluateRule(kbnClient, params, configuration, [], timeFrame); + const results = await evaluateRule( + kbnClient, + params, + configuration, + [], + 10000, + timeFrame + ); expect(results).to.eql([ { dev: { @@ -334,6 +362,7 @@ export default function ({ getService }: FtrProviderContext) { params, configuration, ['dev', 'prod'], + 10000, timeFrame ); expect(results).to.eql([ @@ -392,7 +421,14 @@ export default function ({ getService }: FtrProviderContext) { }; const timeFrame = { end: rate.max }; const kbnClient = convertToKibanaClient(esClient); - const results = await evaluateRule(kbnClient, params, configuration, [], timeFrame); + const results = await evaluateRule( + kbnClient, + params, + configuration, + [], + 10000, + timeFrame + ); expect(results).to.eql([ { '*': { @@ -433,7 +469,14 @@ export default function ({ getService }: FtrProviderContext) { }; const timeFrame = { end: rate.max }; const kbnClient = convertToKibanaClient(esClient); - const results = await evaluateRule(kbnClient, params, configuration, [], timeFrame); + const results = await evaluateRule( + kbnClient, + params, + configuration, + [], + 10000, + timeFrame + ); expect(results).to.eql([ { dev: { diff --git a/x-pack/test/api_integration/apis/metrics_ui/metrics_alerting.ts b/x-pack/test/api_integration/apis/metrics_ui/metrics_alerting.ts index d441c59b24a20f..b5a249229b1b38 100644 --- a/x-pack/test/api_integration/apis/metrics_ui/metrics_alerting.ts +++ b/x-pack/test/api_integration/apis/metrics_ui/metrics_alerting.ts @@ -37,7 +37,7 @@ export default function ({ getService }: FtrProviderContext) { start: moment().subtract(25, 'minutes').valueOf(), end: moment().valueOf(), }; - const searchBody = getElasticsearchMetricQuery(getSearchParams(aggType), timeframe); + const searchBody = getElasticsearchMetricQuery(getSearchParams(aggType), timeframe, 100); const result = await client.search({ index, // @ts-expect-error @elastic/elasticsearch AggregationsBucketsPath is not valid @@ -58,6 +58,7 @@ export default function ({ getService }: FtrProviderContext) { const searchBody = getElasticsearchMetricQuery( getSearchParams('avg'), timeframe, + 100, undefined, '{"bool":{"should":[{"match_phrase":{"agent.hostname":"foo"}}],"minimum_should_match":1}}' ); @@ -81,6 +82,7 @@ export default function ({ getService }: FtrProviderContext) { const searchBody = getElasticsearchMetricQuery( getSearchParams(aggType), timeframe, + 100, 'agent.id' ); const result = await client.search({ @@ -101,6 +103,7 @@ export default function ({ getService }: FtrProviderContext) { const searchBody = getElasticsearchMetricQuery( getSearchParams('avg'), timeframe, + 100, 'agent.id', '{"bool":{"should":[{"match_phrase":{"agent.hostname":"foo"}}],"minimum_should_match":1}}' );