Skip to content

Commit

Permalink
[Metrics UI] Fix No Data alerts on Metric Threshold with groupBy (ela…
Browse files Browse the repository at this point in the history
…stic#111465)

* [Metrics UI] Fix No Data alerts on Metric Threshold with groupBy

* Fix typecheck

* Add integration test for no data groupBy scenario

* Uncomment test files

* Uncomment test files

* Reset stored groups when groupBy parameter changes

* Test for groupBy arrays

* Fix initial No Data result when no groups are detected yet

* Fix linting error

* Fix detecting groups with doc count 0

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
  • Loading branch information
Zacqary and kibanamachine committed Sep 23, 2021
1 parent 39cd4c3 commit 36ec7f5
Show file tree
Hide file tree
Showing 8 changed files with 289 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ export const evaluateAlert = <Params extends EvaluatedAlertParams = EvaluatedAle
esClient: ElasticsearchClient,
params: Params,
config: InfraSource['configuration'],
prevGroups: string[],
timeframe?: { start?: number; end: number }
) => {
const { criteria, groupBy, filterQuery, shouldDropPartialBuckets } = params;
Expand Down Expand Up @@ -91,21 +92,53 @@ export const evaluateAlert = <Params extends EvaluatedAlertParams = EvaluatedAle
: [false];
};

return mapValues(currentValues, (points: any[] | typeof NaN | null) => {
if (isTooManyBucketsPreviewException(points)) throw points;
return {
...criterion,
metric: criterion.metric ?? DOCUMENT_COUNT_I18N,
currentValue: Array.isArray(points) ? last(points)?.value : NaN,
timestamp: Array.isArray(points) ? last(points)?.key : NaN,
shouldFire: pointsEvaluator(points, threshold, comparator),
shouldWarn: pointsEvaluator(points, warningThreshold, warningComparator),
isNoData: Array.isArray(points)
? points.map((point) => point?.value === null || point === null)
: [points === null],
isError: isNaN(Array.isArray(points) ? last(points)?.value : points),
};
});
// If any previous groups are no longer being reported, backfill them with null values
const currentGroups = Object.keys(currentValues);

const missingGroups = prevGroups.filter((g) => !currentGroups.includes(g));
if (currentGroups.length === 0 && missingGroups.length === 0) {
missingGroups.push(UNGROUPED_FACTORY_KEY);
}
const backfillTimestamp =
last(last(Object.values(currentValues)))?.key ?? new Date().toISOString();
const backfilledPrevGroups: Record<
string,
Array<{ key: string; value: number }>
> = missingGroups.reduce(
(result, group) => ({
...result,
[group]: [
{
key: backfillTimestamp,
value: criterion.aggType === Aggregators.COUNT ? 0 : null,
},
],
}),
{}
);
const currentValuesWithBackfilledPrevGroups = {
...currentValues,
...backfilledPrevGroups,
};

return mapValues(
currentValuesWithBackfilledPrevGroups,
(points: any[] | typeof NaN | null) => {
if (isTooManyBucketsPreviewException(points)) throw points;
return {
...criterion,
metric: criterion.metric ?? DOCUMENT_COUNT_I18N,
currentValue: Array.isArray(points) ? last(points)?.value : NaN,
timestamp: Array.isArray(points) ? last(points)?.key : NaN,
shouldFire: pointsEvaluator(points, threshold, comparator),
shouldWarn: pointsEvaluator(points, warningThreshold, warningComparator),
isNoData: Array.isArray(points)
? points.map((point) => point?.value === null || point === null)
: [points === null],
isError: isNaN(Array.isArray(points) ? last(points)?.value : points),
};
}
);
})
);
};
Expand All @@ -119,7 +152,7 @@ const getMetric: (
filterQuery: string | undefined,
timeframe?: { start?: number; end: number },
shouldDropPartialBuckets?: boolean
) => Promise<Record<string, number[]>> = async function (
) => Promise<Record<string, Array<{ key: string; value: number }>>> = async function (
esClient,
params,
index,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ let persistAlertInstances = false; // eslint-disable-line prefer-const

type TestRuleState = Record<string, unknown> & {
aRuleStateKey: string;
groups: string[];
groupBy?: string | string[];
};

const initialRuleState: TestRuleState = {
aRuleStateKey: 'INITIAL_RULE_STATE_VALUE',
groups: [],
};

const mockOptions = {
Expand Down Expand Up @@ -90,6 +93,7 @@ const mockOptions = {

describe('The metric threshold alert type', () => {
describe('querying the entire infrastructure', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
executor({
Expand Down Expand Up @@ -157,20 +161,29 @@ describe('The metric threshold alert type', () => {
});

describe('querying with a groupBy parameter', () => {
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
afterAll(() => clearInstances());
const execute = (
comparator: Comparator,
threshold: number[],
groupBy: string[] = ['something'],
metric?: string,
state?: any
) =>
executor({
...mockOptions,
services,
params: {
groupBy: 'something',
groupBy,
criteria: [
{
...baseNonCountCriterion,
comparator,
threshold,
metric: metric ?? baseNonCountCriterion.metric,
},
],
},
state: state ?? mockOptions.state.wrapped,
});
const instanceIdA = 'a';
const instanceIdB = 'b';
Expand All @@ -194,9 +207,35 @@ describe('The metric threshold alert type', () => {
expect(mostRecentAction(instanceIdA).action.group).toBe('a');
expect(mostRecentAction(instanceIdB).action.group).toBe('b');
});
test('reports previous groups and the groupBy parameter in its state', async () => {
const stateResult = await execute(Comparator.GT, [0.75]);
expect(stateResult.groups).toEqual(expect.arrayContaining(['a', 'b']));
expect(stateResult.groupBy).toEqual(['something']);
});
test('persists previous groups that go missing, until the groupBy param changes', async () => {
const stateResult1 = await execute(Comparator.GT, [0.75], ['something'], 'test.metric.2');
expect(stateResult1.groups).toEqual(expect.arrayContaining(['a', 'b', 'c']));
const stateResult2 = await execute(
Comparator.GT,
[0.75],
['something'],
'test.metric.1',
stateResult1
);
expect(stateResult2.groups).toEqual(expect.arrayContaining(['a', 'b', 'c']));
const stateResult3 = await execute(
Comparator.GT,
[0.75],
['something', 'something-else'],
'test.metric.1',
stateResult2
);
expect(stateResult3.groups).toEqual(expect.arrayContaining(['a', 'b']));
});
});

describe('querying with multiple criteria', () => {
afterAll(() => clearInstances());
const execute = (
comparator: Comparator,
thresholdA: number[],
Expand Down Expand Up @@ -257,6 +296,7 @@ describe('The metric threshold alert type', () => {
});
});
describe('querying with the count aggregator', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
executor({
Expand All @@ -279,8 +319,47 @@ describe('The metric threshold alert type', () => {
await execute(Comparator.LT, [0.5]);
expect(mostRecentAction(instanceID)).toBe(undefined);
});
describe('with a groupBy parameter', () => {
const executeGroupBy = (
comparator: Comparator,
threshold: number[],
sourceId: string = 'default',
state?: any
) =>
executor({
...mockOptions,
services,
params: {
sourceId,
groupBy: 'something',
criteria: [
{
...baseCountCriterion,
comparator,
threshold,
},
],
},
state: state ?? mockOptions.state.wrapped,
});
const instanceIdA = 'a';
const instanceIdB = 'b';

test('successfully detects and alerts on a document count of 0', async () => {
const resultState = await executeGroupBy(Comparator.LT_OR_EQ, [0]);
expect(mostRecentAction(instanceIdA)).toBe(undefined);
expect(mostRecentAction(instanceIdB)).toBe(undefined);
await executeGroupBy(Comparator.LT_OR_EQ, [0], 'empty-response', resultState);
expect(mostRecentAction(instanceIdA).id).toBe(FIRED_ACTIONS.id);
expect(mostRecentAction(instanceIdB).id).toBe(FIRED_ACTIONS.id);
await executeGroupBy(Comparator.LT_OR_EQ, [0]);
expect(mostRecentAction(instanceIdA)).toBe(undefined);
expect(mostRecentAction(instanceIdB)).toBe(undefined);
});
});
});
describe('querying with the p99 aggregator', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
executor({
Expand All @@ -306,6 +385,7 @@ describe('The metric threshold alert type', () => {
});
});
describe('querying with the p95 aggregator', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
executor({
Expand All @@ -332,6 +412,7 @@ describe('The metric threshold alert type', () => {
});
});
describe("querying a metric that hasn't reported data", () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (alertOnNoData: boolean, sourceId: string = 'default') =>
executor({
Expand Down Expand Up @@ -360,7 +441,51 @@ describe('The metric threshold alert type', () => {
});
});

describe('querying a groupBy alert that starts reporting no data, and then later reports data', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const instanceIdA = 'a';
const instanceIdB = 'b';
const execute = (metric: string, state?: any) =>
executor({
...mockOptions,
services,
params: {
groupBy: 'something',
sourceId: 'default',
criteria: [
{
...baseNonCountCriterion,
comparator: Comparator.GT,
threshold: [0],
metric,
},
],
alertOnNoData: true,
},
state: state ?? mockOptions.state.wrapped,
});
const resultState: any[] = [];
test('first sends a No Data alert with the * group, but then reports groups when data is available', async () => {
resultState.push(await execute('test.metric.3'));
expect(mostRecentAction(instanceID).id).toBe(FIRED_ACTIONS.id);
resultState.push(await execute('test.metric.3', resultState.pop()));
expect(mostRecentAction(instanceID).id).toBe(FIRED_ACTIONS.id);
resultState.push(await execute('test.metric.1', resultState.pop()));
expect(mostRecentAction(instanceID)).toBe(undefined);
expect(mostRecentAction(instanceIdA).id).toBe(FIRED_ACTIONS.id);
expect(mostRecentAction(instanceIdB).id).toBe(FIRED_ACTIONS.id);
});
test('sends No Data alerts for the previously detected groups when they stop reporting data, but not the * group', async () => {
await execute('test.metric.3', resultState.pop());
expect(mostRecentAction(instanceID)).toBe(undefined);
expect(mostRecentAction(instanceIdA).id).toBe(FIRED_ACTIONS.id);
expect(mostRecentAction(instanceIdB).id).toBe(FIRED_ACTIONS.id);
});
});

describe("querying a rate-aggregated metric that hasn't reported data", () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (sourceId: string = 'default') =>
executor({
Expand Down Expand Up @@ -439,6 +564,7 @@ describe('The metric threshold alert type', () => {
*/

describe('querying a metric with a percentage metric', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = () =>
executor({
Expand Down Expand Up @@ -497,7 +623,15 @@ const services: AlertServicesMock &
services.scopedClusterClient.asCurrentUser.search.mockImplementation((params?: any): any => {
const from = params?.body.query.bool.filter[0]?.range['@timestamp'].gte;
if (params.index === 'alternatebeat-*') return mocks.changedSourceIdResponse(from);
if (params.index === 'empty-response') return mocks.emptyMetricResponse;
const metric = params?.body.query.bool.filter[1]?.exists.field;
if (metric === 'test.metric.3') {
return elasticsearchClientMock.createSuccessTransportRequestPromise(
params?.body.aggs.aggregatedIntervals?.aggregations.aggregatedValueMax
? mocks.emptyRateResponse
: mocks.emptyMetricResponse
);
}
if (params?.body.aggs.groupings) {
if (params?.body.aggs.groupings.composite.after) {
return elasticsearchClientMock.createSuccessTransportRequestPromise(
Expand All @@ -517,12 +651,6 @@ services.scopedClusterClient.asCurrentUser.search.mockImplementation((params?: a
return elasticsearchClientMock.createSuccessTransportRequestPromise(
mocks.alternateMetricResponse()
);
} else if (metric === 'test.metric.3') {
return elasticsearchClientMock.createSuccessTransportRequestPromise(
params?.body.aggs.aggregatedIntervals.aggregations.aggregatedValueMax
? mocks.emptyRateResponse
: mocks.emptyMetricResponse
);
}
return elasticsearchClientMock.createSuccessTransportRequestPromise(mocks.basicMetricResponse());
});
Expand All @@ -534,6 +662,13 @@ services.savedObjectsClient.get.mockImplementation(async (type: string, sourceId
type,
references: [],
};
if (sourceId === 'empty-response')
return {
id: 'empty',
attributes: { metricAlias: 'empty-response' },
type,
references: [],
};
return { id: 'default', attributes: { metricAlias: 'metricbeat-*' }, type, references: [] };
});

Expand Down Expand Up @@ -561,7 +696,13 @@ services.alertInstanceFactory.mockImplementation((instanceID: string) => {
});

function mostRecentAction(id: string) {
return alertInstances.get(id)!.actionQueue.pop();
const instance = alertInstances.get(id);
if (!instance) return undefined;
return instance.actionQueue.pop();
}

function clearInstances() {
alertInstances.clear();
}

const baseNonCountCriterion: Pick<
Expand Down
Loading

0 comments on commit 36ec7f5

Please sign in to comment.