Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Metrics UI] Fix No Data alerts on Metric Threshold with groupBy #111465

Merged
merged 17 commits into from
Sep 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ export const evaluateAlert = <Params extends EvaluatedAlertParams = EvaluatedAle
esClient: ElasticsearchClient,
params: Params,
config: InfraSource['configuration'],
prevGroups: string[],
timeframe?: { start?: number; end: number }
) => {
const { criteria, groupBy, filterQuery, shouldDropPartialBuckets } = params;
Expand Down Expand Up @@ -91,21 +92,53 @@ export const evaluateAlert = <Params extends EvaluatedAlertParams = EvaluatedAle
: [false];
};

return mapValues(currentValues, (points: any[] | typeof NaN | null) => {
if (isTooManyBucketsPreviewException(points)) throw points;
return {
...criterion,
metric: criterion.metric ?? DOCUMENT_COUNT_I18N,
currentValue: Array.isArray(points) ? last(points)?.value : NaN,
timestamp: Array.isArray(points) ? last(points)?.key : NaN,
shouldFire: pointsEvaluator(points, threshold, comparator),
shouldWarn: pointsEvaluator(points, warningThreshold, warningComparator),
isNoData: Array.isArray(points)
? points.map((point) => point?.value === null || point === null)
: [points === null],
isError: isNaN(Array.isArray(points) ? last(points)?.value : points),
};
});
// If any previous groups are no longer being reported, backfill them with null values
const currentGroups = Object.keys(currentValues);

const missingGroups = prevGroups.filter((g) => !currentGroups.includes(g));
if (currentGroups.length === 0 && missingGroups.length === 0) {
missingGroups.push(UNGROUPED_FACTORY_KEY);
}
const backfillTimestamp =
last(last(Object.values(currentValues)))?.key ?? new Date().toISOString();
const backfilledPrevGroups: Record<
string,
Array<{ key: string; value: number }>
> = missingGroups.reduce(
(result, group) => ({
...result,
[group]: [
{
key: backfillTimestamp,
value: criterion.aggType === Aggregators.COUNT ? 0 : null,
},
],
}),
{}
);
const currentValuesWithBackfilledPrevGroups = {
...currentValues,
...backfilledPrevGroups,
};

return mapValues(
currentValuesWithBackfilledPrevGroups,
(points: any[] | typeof NaN | null) => {
if (isTooManyBucketsPreviewException(points)) throw points;
return {
...criterion,
metric: criterion.metric ?? DOCUMENT_COUNT_I18N,
currentValue: Array.isArray(points) ? last(points)?.value : NaN,
timestamp: Array.isArray(points) ? last(points)?.key : NaN,
shouldFire: pointsEvaluator(points, threshold, comparator),
shouldWarn: pointsEvaluator(points, warningThreshold, warningComparator),
isNoData: Array.isArray(points)
? points.map((point) => point?.value === null || point === null)
: [points === null],
isError: isNaN(Array.isArray(points) ? last(points)?.value : points),
};
}
);
})
);
};
Expand All @@ -119,7 +152,7 @@ const getMetric: (
filterQuery: string | undefined,
timeframe?: { start?: number; end: number },
shouldDropPartialBuckets?: boolean
) => Promise<Record<string, number[]>> = async function (
) => Promise<Record<string, Array<{ key: string; value: number }>>> = async function (
esClient,
params,
index,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ let persistAlertInstances = false; // eslint-disable-line prefer-const

type TestRuleState = Record<string, unknown> & {
aRuleStateKey: string;
groups: string[];
groupBy?: string | string[];
};

const initialRuleState: TestRuleState = {
aRuleStateKey: 'INITIAL_RULE_STATE_VALUE',
groups: [],
};

const mockOptions = {
Expand Down Expand Up @@ -90,6 +93,7 @@ const mockOptions = {

describe('The metric threshold alert type', () => {
describe('querying the entire infrastructure', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
executor({
Expand Down Expand Up @@ -157,20 +161,29 @@ describe('The metric threshold alert type', () => {
});

describe('querying with a groupBy parameter', () => {
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
afterAll(() => clearInstances());
const execute = (
comparator: Comparator,
threshold: number[],
groupBy: string[] = ['something'],
metric?: string,
state?: any
) =>
executor({
...mockOptions,
services,
params: {
groupBy: 'something',
groupBy,
criteria: [
{
...baseNonCountCriterion,
comparator,
threshold,
metric: metric ?? baseNonCountCriterion.metric,
},
],
},
state: state ?? mockOptions.state.wrapped,
});
const instanceIdA = 'a';
const instanceIdB = 'b';
Expand All @@ -194,9 +207,35 @@ describe('The metric threshold alert type', () => {
expect(mostRecentAction(instanceIdA).action.group).toBe('a');
expect(mostRecentAction(instanceIdB).action.group).toBe('b');
});
test('reports previous groups and the groupBy parameter in its state', async () => {
const stateResult = await execute(Comparator.GT, [0.75]);
expect(stateResult.groups).toEqual(expect.arrayContaining(['a', 'b']));
expect(stateResult.groupBy).toEqual(['something']);
});
test('persists previous groups that go missing, until the groupBy param changes', async () => {
const stateResult1 = await execute(Comparator.GT, [0.75], ['something'], 'test.metric.2');
expect(stateResult1.groups).toEqual(expect.arrayContaining(['a', 'b', 'c']));
const stateResult2 = await execute(
Comparator.GT,
[0.75],
['something'],
'test.metric.1',
stateResult1
);
expect(stateResult2.groups).toEqual(expect.arrayContaining(['a', 'b', 'c']));
const stateResult3 = await execute(
Comparator.GT,
[0.75],
['something', 'something-else'],
'test.metric.1',
stateResult2
);
expect(stateResult3.groups).toEqual(expect.arrayContaining(['a', 'b']));
});
});

describe('querying with multiple criteria', () => {
afterAll(() => clearInstances());
const execute = (
comparator: Comparator,
thresholdA: number[],
Expand Down Expand Up @@ -257,6 +296,7 @@ describe('The metric threshold alert type', () => {
});
});
describe('querying with the count aggregator', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
executor({
Expand All @@ -279,8 +319,47 @@ describe('The metric threshold alert type', () => {
await execute(Comparator.LT, [0.5]);
expect(mostRecentAction(instanceID)).toBe(undefined);
});
describe('with a groupBy parameter', () => {
const executeGroupBy = (
comparator: Comparator,
threshold: number[],
sourceId: string = 'default',
state?: any
) =>
executor({
...mockOptions,
services,
params: {
sourceId,
groupBy: 'something',
criteria: [
{
...baseCountCriterion,
comparator,
threshold,
},
],
},
state: state ?? mockOptions.state.wrapped,
});
const instanceIdA = 'a';
const instanceIdB = 'b';

test('successfully detects and alerts on a document count of 0', async () => {
const resultState = await executeGroupBy(Comparator.LT_OR_EQ, [0]);
expect(mostRecentAction(instanceIdA)).toBe(undefined);
expect(mostRecentAction(instanceIdB)).toBe(undefined);
await executeGroupBy(Comparator.LT_OR_EQ, [0], 'empty-response', resultState);
expect(mostRecentAction(instanceIdA).id).toBe(FIRED_ACTIONS.id);
expect(mostRecentAction(instanceIdB).id).toBe(FIRED_ACTIONS.id);
await executeGroupBy(Comparator.LT_OR_EQ, [0]);
expect(mostRecentAction(instanceIdA)).toBe(undefined);
expect(mostRecentAction(instanceIdB)).toBe(undefined);
});
});
});
describe('querying with the p99 aggregator', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
executor({
Expand All @@ -306,6 +385,7 @@ describe('The metric threshold alert type', () => {
});
});
describe('querying with the p95 aggregator', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (comparator: Comparator, threshold: number[], sourceId: string = 'default') =>
executor({
Expand All @@ -332,6 +412,7 @@ describe('The metric threshold alert type', () => {
});
});
describe("querying a metric that hasn't reported data", () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (alertOnNoData: boolean, sourceId: string = 'default') =>
executor({
Expand Down Expand Up @@ -360,7 +441,51 @@ describe('The metric threshold alert type', () => {
});
});

describe('querying a groupBy alert that starts reporting no data, and then later reports data', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const instanceIdA = 'a';
const instanceIdB = 'b';
const execute = (metric: string, state?: any) =>
executor({
...mockOptions,
services,
params: {
groupBy: 'something',
sourceId: 'default',
criteria: [
{
...baseNonCountCriterion,
comparator: Comparator.GT,
threshold: [0],
metric,
},
],
alertOnNoData: true,
},
state: state ?? mockOptions.state.wrapped,
});
const resultState: any[] = [];
test('first sends a No Data alert with the * group, but then reports groups when data is available', async () => {
resultState.push(await execute('test.metric.3'));
expect(mostRecentAction(instanceID).id).toBe(FIRED_ACTIONS.id);
resultState.push(await execute('test.metric.3', resultState.pop()));
expect(mostRecentAction(instanceID).id).toBe(FIRED_ACTIONS.id);
resultState.push(await execute('test.metric.1', resultState.pop()));
expect(mostRecentAction(instanceID)).toBe(undefined);
expect(mostRecentAction(instanceIdA).id).toBe(FIRED_ACTIONS.id);
expect(mostRecentAction(instanceIdB).id).toBe(FIRED_ACTIONS.id);
});
test('sends No Data alerts for the previously detected groups when they stop reporting data, but not the * group', async () => {
await execute('test.metric.3', resultState.pop());
expect(mostRecentAction(instanceID)).toBe(undefined);
expect(mostRecentAction(instanceIdA).id).toBe(FIRED_ACTIONS.id);
expect(mostRecentAction(instanceIdB).id).toBe(FIRED_ACTIONS.id);
});
});

describe("querying a rate-aggregated metric that hasn't reported data", () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = (sourceId: string = 'default') =>
executor({
Expand Down Expand Up @@ -439,6 +564,7 @@ describe('The metric threshold alert type', () => {
*/

describe('querying a metric with a percentage metric', () => {
afterAll(() => clearInstances());
const instanceID = '*';
const execute = () =>
executor({
Expand Down Expand Up @@ -497,7 +623,15 @@ const services: AlertServicesMock &
services.scopedClusterClient.asCurrentUser.search.mockImplementation((params?: any): any => {
const from = params?.body.query.bool.filter[0]?.range['@timestamp'].gte;
if (params.index === 'alternatebeat-*') return mocks.changedSourceIdResponse(from);
if (params.index === 'empty-response') return mocks.emptyMetricResponse;
const metric = params?.body.query.bool.filter[1]?.exists.field;
if (metric === 'test.metric.3') {
return elasticsearchClientMock.createSuccessTransportRequestPromise(
params?.body.aggs.aggregatedIntervals?.aggregations.aggregatedValueMax
? mocks.emptyRateResponse
: mocks.emptyMetricResponse
);
}
if (params?.body.aggs.groupings) {
if (params?.body.aggs.groupings.composite.after) {
return elasticsearchClientMock.createSuccessTransportRequestPromise(
Expand All @@ -517,12 +651,6 @@ services.scopedClusterClient.asCurrentUser.search.mockImplementation((params?: a
return elasticsearchClientMock.createSuccessTransportRequestPromise(
mocks.alternateMetricResponse()
);
} else if (metric === 'test.metric.3') {
return elasticsearchClientMock.createSuccessTransportRequestPromise(
params?.body.aggs.aggregatedIntervals.aggregations.aggregatedValueMax
? mocks.emptyRateResponse
: mocks.emptyMetricResponse
);
}
return elasticsearchClientMock.createSuccessTransportRequestPromise(mocks.basicMetricResponse());
});
Expand All @@ -534,6 +662,13 @@ services.savedObjectsClient.get.mockImplementation(async (type: string, sourceId
type,
references: [],
};
if (sourceId === 'empty-response')
return {
id: 'empty',
attributes: { metricAlias: 'empty-response' },
type,
references: [],
};
return { id: 'default', attributes: { metricAlias: 'metricbeat-*' }, type, references: [] };
});

Expand Down Expand Up @@ -561,7 +696,13 @@ services.alertInstanceFactory.mockImplementation((instanceID: string) => {
});

function mostRecentAction(id: string) {
return alertInstances.get(id)!.actionQueue.pop();
const instance = alertInstances.get(id);
if (!instance) return undefined;
return instance.actionQueue.pop();
}

function clearInstances() {
alertInstances.clear();
}

const baseNonCountCriterion: Pick<
Expand Down
Loading