Skip to content

Commit

Permalink
[Alerting] Handle errors with scheduled task document during rule dis…
Browse files Browse the repository at this point in the history
…able (#118618) (#119084)

* Catching errors during rule disable

* Adding functional test

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>

Co-authored-by: ymao1 <ying.mao@elastic.co>
  • Loading branch information
kibanamachine and ymao1 authored Nov 18, 2021
1 parent 7e70cb8 commit 9770e80
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 69 deletions.
81 changes: 44 additions & 37 deletions x-pack/plugins/alerting/server/rules_client/rules_client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1192,44 +1192,51 @@ export class RulesClient {
}

if (this.eventLogger && attributes.scheduledTaskId) {
const { state } = taskInstanceToAlertTaskInstance(
await this.taskManager.get(attributes.scheduledTaskId),
attributes as unknown as SanitizedAlert
);
try {
const { state } = taskInstanceToAlertTaskInstance(
await this.taskManager.get(attributes.scheduledTaskId),
attributes as unknown as SanitizedAlert
);

const recoveredAlertInstances = mapValues<Record<string, RawAlertInstance>, AlertInstance>(
state.alertInstances ?? {},
(rawAlertInstance) => new AlertInstance(rawAlertInstance)
);
const recoveredAlertInstanceIds = Object.keys(recoveredAlertInstances);

for (const instanceId of recoveredAlertInstanceIds) {
const { group: actionGroup, subgroup: actionSubgroup } =
recoveredAlertInstances[instanceId].getLastScheduledActions() ?? {};
const instanceState = recoveredAlertInstances[instanceId].getState();
const message = `instance '${instanceId}' has recovered due to the rule was disabled`;

const event = createAlertEventLogRecordObject({
ruleId: id,
ruleName: attributes.name,
ruleType: this.ruleTypeRegistry.get(attributes.alertTypeId),
instanceId,
action: EVENT_LOG_ACTIONS.recoveredInstance,
message,
state: instanceState,
group: actionGroup,
subgroup: actionSubgroup,
namespace: this.namespace,
savedObjects: [
{
id,
type: 'alert',
typeId: attributes.alertTypeId,
relation: SAVED_OBJECT_REL_PRIMARY,
},
],
});
this.eventLogger.logEvent(event);
const recoveredAlertInstances = mapValues<Record<string, RawAlertInstance>, AlertInstance>(
state.alertInstances ?? {},
(rawAlertInstance) => new AlertInstance(rawAlertInstance)
);
const recoveredAlertInstanceIds = Object.keys(recoveredAlertInstances);

for (const instanceId of recoveredAlertInstanceIds) {
const { group: actionGroup, subgroup: actionSubgroup } =
recoveredAlertInstances[instanceId].getLastScheduledActions() ?? {};
const instanceState = recoveredAlertInstances[instanceId].getState();
const message = `instance '${instanceId}' has recovered due to the rule was disabled`;

const event = createAlertEventLogRecordObject({
ruleId: id,
ruleName: attributes.name,
ruleType: this.ruleTypeRegistry.get(attributes.alertTypeId),
instanceId,
action: EVENT_LOG_ACTIONS.recoveredInstance,
message,
state: instanceState,
group: actionGroup,
subgroup: actionSubgroup,
namespace: this.namespace,
savedObjects: [
{
id,
type: 'alert',
typeId: attributes.alertTypeId,
relation: SAVED_OBJECT_REL_PRIMARY,
},
],
});
this.eventLogger.logEvent(event);
}
} catch (error) {
// this should not block the rest of the disable process
this.logger.warn(
`rulesClient.disable('${id}') - Could not write recovery events - ${error.message}`
);
}
}

Expand Down
59 changes: 59 additions & 0 deletions x-pack/plugins/alerting/server/rules_client/tests/disable.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,65 @@ describe('disable()', () => {
});
});

test('disables the rule even if unable to retrieve task manager doc to generate recovery event log events', async () => {
unsecuredSavedObjectsClient.create.mockResolvedValueOnce({
id: '1',
type: 'api_key_pending_invalidation',
attributes: {
apiKeyId: '123',
createdAt: '2019-02-12T21:01:22.479Z',
},
references: [],
});
taskManager.get.mockRejectedValueOnce(new Error('Fail'));
await rulesClient.disable({ id: '1' });
expect(unsecuredSavedObjectsClient.get).not.toHaveBeenCalled();
expect(encryptedSavedObjects.getDecryptedAsInternalUser).toHaveBeenCalledWith('alert', '1', {
namespace: 'default',
});
expect(unsecuredSavedObjectsClient.update).toHaveBeenCalledWith(
'alert',
'1',
{
consumer: 'myApp',
schedule: { interval: '10s' },
alertTypeId: 'myType',
enabled: false,
meta: {
versionApiKeyLastmodified: kibanaVersion,
},
scheduledTaskId: null,
apiKey: null,
apiKeyOwner: null,
updatedAt: '2019-02-12T21:01:22.479Z',
updatedBy: 'elastic',
actions: [
{
group: 'default',
id: '1',
actionTypeId: '1',
actionRef: '1',
params: {
foo: true,
},
},
],
},
{
version: '123',
}
);
expect(taskManager.removeIfExists).toHaveBeenCalledWith('task-123');
expect(
(unsecuredSavedObjectsClient.create.mock.calls[0][1] as InvalidatePendingApiKey).apiKeyId
).toBe('123');

expect(eventLogger.logEvent).toHaveBeenCalledTimes(0);
expect(rulesClientParams.logger.warn).toHaveBeenCalledWith(
`rulesClient.disable('1') - Could not write recovery events - Fail`
);
});

test('falls back when getDecryptedAsInternalUser throws an error', async () => {
encryptedSavedObjects.getDecryptedAsInternalUser.mockRejectedValueOnce(new Error('Fail'));
unsecuredSavedObjectsClient.create.mockResolvedValueOnce({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,25 @@ import expect from '@kbn/expect';
import { Spaces } from '../../scenarios';
import { FtrProviderContext } from '../../../common/ftr_provider_context';
import {
AlertUtils,
AlertUtils as RuleUtils,
checkAAD,
getUrlPrefix,
getTestAlertData,
getTestAlertData as getTestRuleData,
ObjectRemover,
getEventLog,
} from '../../../common/lib';
import { validateEvent } from './event_log';

// eslint-disable-next-line import/no-default-export
export default function createDisableAlertTests({ getService }: FtrProviderContext) {
export default function createDisableRuleTests({ getService }: FtrProviderContext) {
const es = getService('es');
const supertestWithoutAuth = getService('supertestWithoutAuth');
const retry = getService('retry');
const supertest = getService('supertest');

describe('disable', () => {
const objectRemover = new ObjectRemover(supertestWithoutAuth);
const alertUtils = new AlertUtils({ space: Spaces.space1, supertestWithoutAuth });
const ruleUtils = new RuleUtils({ space: Spaces.space1, supertestWithoutAuth });

after(() => objectRemover.removeAll());

Expand All @@ -38,18 +38,18 @@ export default function createDisableAlertTests({ getService }: FtrProviderConte
});
}

it('should handle disable alert request appropriately', async () => {
const { body: createdAlert } = await supertestWithoutAuth
it('should handle disable rule request appropriately', async () => {
const { body: createdRule } = await supertestWithoutAuth
.post(`${getUrlPrefix(Spaces.space1.id)}/api/alerting/rule`)
.set('kbn-xsrf', 'foo')
.send(getTestAlertData({ enabled: true }))
.send(getTestRuleData({ enabled: true }))
.expect(200);
objectRemover.add(Spaces.space1.id, createdAlert.id, 'rule', 'alerting');
objectRemover.add(Spaces.space1.id, createdRule.id, 'rule', 'alerting');

await alertUtils.disable(createdAlert.id);
await ruleUtils.disable(createdRule.id);

try {
await getScheduledTask(createdAlert.scheduledTaskId);
await getScheduledTask(createdRule.scheduled_task_id);
throw new Error('Should have removed scheduled task');
} catch (e) {
expect(e.meta.statusCode).to.eql(404);
Expand All @@ -60,27 +60,27 @@ export default function createDisableAlertTests({ getService }: FtrProviderConte
supertest: supertestWithoutAuth,
spaceId: Spaces.space1.id,
type: 'alert',
id: createdAlert.id,
id: createdRule.id,
});
});

it(`shouldn't disable alert from another space`, async () => {
const { body: createdAlert } = await supertestWithoutAuth
it(`shouldn't disable rule from another space`, async () => {
const { body: createdRule } = await supertestWithoutAuth
.post(`${getUrlPrefix(Spaces.other.id)}/api/alerting/rule`)
.set('kbn-xsrf', 'foo')
.send(getTestAlertData({ enabled: true }))
.send(getTestRuleData({ enabled: true }))
.expect(200);
objectRemover.add(Spaces.other.id, createdAlert.id, 'rule', 'alerting');
objectRemover.add(Spaces.other.id, createdRule.id, 'rule', 'alerting');

await alertUtils.getDisableRequest(createdAlert.id).expect(404, {
await ruleUtils.getDisableRequest(createdRule.id).expect(404, {
statusCode: 404,
error: 'Not Found',
message: `Saved object [alert/${createdAlert.id}] not found`,
message: `Saved object [alert/${createdRule.id}] not found`,
});
});

it('should create recovered-instance events for all alert instances', async () => {
const { body: createdAlert } = await supertest
it('should create recovered-instance events for all alerts', async () => {
const { body: createdRule } = await supertest
.post(`${getUrlPrefix(Spaces.space1.id)}/api/alerting/rule`)
.set('kbn-xsrf', 'foo')
.send({
Expand All @@ -96,21 +96,21 @@ export default function createDisableAlertTests({ getService }: FtrProviderConte
notify_when: 'onThrottleInterval',
})
.expect(200);
objectRemover.add(Spaces.space1.id, createdAlert.id, 'rule', 'alerting');
objectRemover.add(Spaces.space1.id, createdRule.id, 'rule', 'alerting');

// wait for alert to actually execute
// wait for rule to actually execute
await retry.try(async () => {
const response = await supertest.get(
`${getUrlPrefix(Spaces.space1.id)}/internal/alerting/rule/${createdAlert.id}/state`
`${getUrlPrefix(Spaces.space1.id)}/internal/alerting/rule/${createdRule.id}/state`
);

expect(response.status).to.eql(200);
expect(response.body).to.key('alerts', 'rule_type_state', 'previous_started_at');
expect(response.body.rule_type_state.runCount).to.greaterThan(1);
});

await alertUtils.getDisableRequest(createdAlert.id);
const ruleId = createdAlert.id;
await ruleUtils.getDisableRequest(createdRule.id);
const ruleId = createdRule.id;

// wait for the events we're expecting
const events = await retry.try(async () => {
Expand Down Expand Up @@ -140,30 +140,54 @@ export default function createDisableAlertTests({ getService }: FtrProviderConte
shouldHaveTask: false,
rule: {
id: ruleId,
category: createdAlert.rule_type_id,
category: createdRule.rule_type_id,
license: 'basic',
ruleset: 'alertsFixture',
name: 'abc',
},
});
});

it('should disable rule even if associated task manager document is missing', async () => {
const { body: createdRule } = await supertestWithoutAuth
.post(`${getUrlPrefix(Spaces.space1.id)}/api/alerting/rule`)
.set('kbn-xsrf', 'foo')
.send(getTestRuleData({ enabled: true }))
.expect(200);
objectRemover.add(Spaces.space1.id, createdRule.id, 'rule', 'alerting');

// manually remove scheduled task
await es.delete({
id: `task:${createdRule.scheduled_task_id}`,
index: '.kibana_task_manager',
});
await ruleUtils.disable(createdRule.id);

// Ensure AAD isn't broken
await checkAAD({
supertest: supertestWithoutAuth,
spaceId: Spaces.space1.id,
type: 'alert',
id: createdRule.id,
});
});

describe('legacy', () => {
it('should handle disable alert request appropriately', async () => {
const { body: createdAlert } = await supertestWithoutAuth
it('should handle disable rule request appropriately', async () => {
const { body: createdRule } = await supertestWithoutAuth
.post(`${getUrlPrefix(Spaces.space1.id)}/api/alerting/rule`)
.set('kbn-xsrf', 'foo')
.send(getTestAlertData({ enabled: true }))
.send(getTestRuleData({ enabled: true }))
.expect(200);
objectRemover.add(Spaces.space1.id, createdAlert.id, 'rule', 'alerting');
objectRemover.add(Spaces.space1.id, createdRule.id, 'rule', 'alerting');

await supertestWithoutAuth
.post(`${getUrlPrefix(Spaces.space1.id)}/api/alerts/alert/${createdAlert.id}/_disable`)
.post(`${getUrlPrefix(Spaces.space1.id)}/api/alerts/alert/${createdRule.id}/_disable`)
.set('kbn-xsrf', 'foo')
.expect(204);

try {
await getScheduledTask(createdAlert.scheduledTaskId);
await getScheduledTask(createdRule.scheduled_task_id);
throw new Error('Should have removed scheduled task');
} catch (e) {
expect(e.meta.statusCode).to.eql(404);
Expand All @@ -174,7 +198,7 @@ export default function createDisableAlertTests({ getService }: FtrProviderConte
supertest: supertestWithoutAuth,
spaceId: Spaces.space1.id,
type: 'alert',
id: createdAlert.id,
id: createdRule.id,
});
});
});
Expand Down

0 comments on commit 9770e80

Please sign in to comment.