Skip to content

Commit

Permalink
return reason for health metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
ersin-erdal committed Mar 30, 2023
1 parent 7840216 commit 2f3762f
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 50 deletions.

This file was deleted.

40 changes: 30 additions & 10 deletions x-pack/plugins/task_manager/server/lib/log_health_metrics.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,30 @@ describe('logHealthMetrics', () => {
const { calculateHealthStatus } = jest.requireMock('./calculate_health_status');

// We must change from OK to Warning
(calculateHealthStatus as jest.Mock<HealthStatus>).mockImplementation(() => HealthStatus.OK);
(
calculateHealthStatus as jest.Mock<{ status: HealthStatus; reason?: string }>
).mockImplementation(() => ({
status: HealthStatus.OK,
}));
logHealthMetrics(health, logger, config, true, docLinks);
(calculateHealthStatus as jest.Mock<HealthStatus>).mockImplementation(
() => HealthStatus.Warning
);
(
calculateHealthStatus as jest.Mock<{ status: HealthStatus; reason?: string }>
).mockImplementation(() => ({
status: HealthStatus.Warning,
}));
logHealthMetrics(health, logger, config, true, docLinks);
// We must change from OK to Error
(calculateHealthStatus as jest.Mock<HealthStatus>).mockImplementation(() => HealthStatus.OK);
(
calculateHealthStatus as jest.Mock<{ status: HealthStatus; reason?: string }>
).mockImplementation(() => ({
status: HealthStatus.OK,
}));
logHealthMetrics(health, logger, config, true, docLinks);
(calculateHealthStatus as jest.Mock<HealthStatus>).mockImplementation(() => HealthStatus.Error);
(
calculateHealthStatus as jest.Mock<{ status: HealthStatus; reason?: string }>
).mockImplementation(() => ({
status: HealthStatus.Error,
}));
logHealthMetrics(health, logger, config, true, docLinks);

const debugCalls = (logger as jest.Mocked<Logger>).debug.mock.calls;
Expand Down Expand Up @@ -175,9 +189,11 @@ describe('logHealthMetrics', () => {
});
const health = getMockMonitoredHealth();
const { calculateHealthStatus } = jest.requireMock('./calculate_health_status');
(calculateHealthStatus as jest.Mock<HealthStatus>).mockImplementation(
() => HealthStatus.Warning
);
(
calculateHealthStatus as jest.Mock<{ status: HealthStatus; reason?: string }>
).mockImplementation(() => ({
status: HealthStatus.Warning,
}));

logHealthMetrics(health, logger, config, true, docLinks);

Expand All @@ -201,7 +217,11 @@ describe('logHealthMetrics', () => {
});
const health = getMockMonitoredHealth();
const { calculateHealthStatus } = jest.requireMock('./calculate_health_status');
(calculateHealthStatus as jest.Mock<HealthStatus>).mockImplementation(() => HealthStatus.Error);
(
calculateHealthStatus as jest.Mock<{ status: HealthStatus; reason?: string }>
).mockImplementation(() => ({
status: HealthStatus.Error,
}));

logHealthMetrics(health, logger, config, true, docLinks);

Expand Down
9 changes: 3 additions & 6 deletions x-pack/plugins/task_manager/server/lib/log_health_metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,9 @@ export function logHealthMetrics(
capacity_estimation: undefined,
},
};
const { status: statusWithoutCapacity } = calculateHealthStatus(
healthWithoutCapacity,
config,
shouldRunTasks,
logger
);
const healthStatus = calculateHealthStatus(healthWithoutCapacity, config, shouldRunTasks, logger);

const statusWithoutCapacity = healthStatus?.status;
if (statusWithoutCapacity === HealthStatus.Warning) {
logLevel = LogLevel.Warn;
} else if (statusWithoutCapacity === HealthStatus.Error && !isEmpty(monitoredHealth.stats)) {
Expand Down
55 changes: 36 additions & 19 deletions x-pack/plugins/task_manager/server/routes/health.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,9 @@ import { mockHandlerArguments } from './_mock_handler_arguments';
import { sleep } from '../test_utils';
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
import { usageCountersServiceMock } from '@kbn/usage-collection-plugin/server/usage_counters/usage_counters_service.mock';
import {
HealthStatus,
MonitoringStats,
RawMonitoringStats,
summarizeMonitoringStats,
} from '../monitoring';
import { MonitoringStats, RawMonitoringStats, summarizeMonitoringStats } from '../monitoring';
import { ServiceStatusLevels, Logger } from '@kbn/core/server';
import { configSchema, TaskManagerConfig } from '../config';
import { calculateHealthStatusMock } from '../lib/calculate_health_status.mock';
import { FillPoolResult } from '../lib/fill_pool';

jest.mock('../lib/log_health_metrics', () => ({
Expand Down Expand Up @@ -191,8 +185,6 @@ describe('healthRoute', () => {

it('logs the Task Manager stats at a fixed interval', async () => {
const router = httpServiceMock.createRouter();
const calculateHealthStatus = calculateHealthStatusMock.create();
calculateHealthStatus.mockImplementation(() => HealthStatus.OK);
const { logHealthMetrics } = jest.requireMock('../lib/log_health_metrics');

const mockStat = mockHealthStats();
Expand Down Expand Up @@ -253,8 +245,6 @@ describe('healthRoute', () => {

it(`logs at a warn level if the status is warning`, async () => {
const router = httpServiceMock.createRouter();
const calculateHealthStatus = calculateHealthStatusMock.create();
calculateHealthStatus.mockImplementation(() => HealthStatus.Warning);
const { logHealthMetrics } = jest.requireMock('../lib/log_health_metrics');

const warnRuntimeStat = mockHealthStats();
Expand All @@ -265,7 +255,7 @@ describe('healthRoute', () => {
const stats$ = new Subject<MonitoringStats>();

const id = uuidv4();
healthRoute({
const { serviceStatus$ } = healthRoute({
router,
monitoringStats$: stats$,
logger,
Expand All @@ -287,6 +277,8 @@ describe('healthRoute', () => {
docLinks,
});

const serviceStatus = getLatest(serviceStatus$);

stats$.next(warnRuntimeStat);
await sleep(1001);
stats$.next(warnConfigurationStat);
Expand All @@ -295,6 +287,12 @@ describe('healthRoute', () => {
await sleep(1001);
stats$.next(warnEphemeralStat);

expect(await serviceStatus).toMatchObject({
level: ServiceStatusLevels.degraded,
summary:
'Task Manager is unhealthy - Reason: / setting HealthStatus.Warning because assumedAverageRecurringRequiredThroughputPerMinutePerKibana (78.28472222222223) < capacityPerMinutePerKibana (200)',
});

expect(logHealthMetrics).toBeCalledTimes(4);
expect(logHealthMetrics.mock.calls[0][0]).toMatchObject({
id,
Expand Down Expand Up @@ -332,8 +330,6 @@ describe('healthRoute', () => {

it(`logs at an error level if the status is error`, async () => {
const router = httpServiceMock.createRouter();
const calculateHealthStatus = calculateHealthStatusMock.create();
calculateHealthStatus.mockImplementation(() => HealthStatus.Error);
const { logHealthMetrics } = jest.requireMock('../lib/log_health_metrics');

const errorRuntimeStat = mockHealthStats();
Expand All @@ -344,7 +340,7 @@ describe('healthRoute', () => {
const stats$ = new Subject<MonitoringStats>();

const id = uuidv4();
healthRoute({
const { serviceStatus$ } = healthRoute({
router,
monitoringStats$: stats$,
logger,
Expand All @@ -366,6 +362,8 @@ describe('healthRoute', () => {
docLinks,
});

const serviceStatus = getLatest(serviceStatus$);

stats$.next(errorRuntimeStat);
await sleep(1001);
stats$.next(errorConfigurationStat);
Expand All @@ -374,6 +372,12 @@ describe('healthRoute', () => {
await sleep(1001);
stats$.next(errorEphemeralStat);

expect(await serviceStatus).toMatchObject({
level: ServiceStatusLevels.degraded,
summary:
'Task Manager is unhealthy - Reason: / setting HealthStatus.Warning because assumedAverageRecurringRequiredThroughputPerMinutePerKibana (78.28472222222223) < capacityPerMinutePerKibana (200)',
});

expect(logHealthMetrics).toBeCalledTimes(4);
expect(logHealthMetrics.mock.calls[0][0]).toMatchObject({
id,
Expand Down Expand Up @@ -481,7 +485,8 @@ describe('healthRoute', () => {

expect(await serviceStatus).toMatchObject({
level: ServiceStatusLevels.degraded,
summary: 'Task Manager is unhealthy',
summary:
'Task Manager is unhealthy - Reason: setting HealthStatus.Error because of expired hot timestamps',
});
const warnCalls = (logger as jest.Mocked<Logger>).warn.mock.calls as string[][];
const warnMessage =
Expand All @@ -497,7 +502,7 @@ describe('healthRoute', () => {

const stats$ = new Subject<MonitoringStats>();

healthRoute({
const { serviceStatus$ } = healthRoute({
router,
monitoringStats$: stats$,
logger,
Expand All @@ -514,6 +519,8 @@ describe('healthRoute', () => {
docLinks,
});

const serviceStatus = getLatest(serviceStatus$);

await sleep(0);

const lastUpdateOfWorkload = new Date(Date.now() - 120000).toISOString();
Expand All @@ -533,6 +540,11 @@ describe('healthRoute', () => {

await sleep(2000);

expect(await serviceStatus).toMatchObject({
level: ServiceStatusLevels.degraded,
summary:
'Task Manager is unhealthy - Reason: setting HealthStatus.Error because of expired cold timestamps',
});
expect(await handler(context, req, res)).toMatchObject({
body: {
status: 'error',
Expand Down Expand Up @@ -572,7 +584,7 @@ describe('healthRoute', () => {
const router = httpServiceMock.createRouter();

const stats$ = new Subject<MonitoringStats>();
healthRoute({
const { serviceStatus$ } = healthRoute({
router,
monitoringStats$: stats$,
logger,
Expand All @@ -588,7 +600,7 @@ describe('healthRoute', () => {
shouldRunTasks: true,
docLinks,
});

const serviceStatus = getLatest(serviceStatus$);
await sleep(0);

// eslint-disable-next-line @typescript-eslint/naming-convention
Expand All @@ -611,6 +623,11 @@ describe('healthRoute', () => {

const [context, req, res] = mockHandlerArguments({}, {}, ['ok']);

expect(await serviceStatus).toMatchObject({
level: ServiceStatusLevels.degraded,
summary:
'Task Manager is unhealthy - Reason: setting HealthStatus.Error because of expired hot timestamps',
});
expect(await handler(context, req, res)).toMatchObject({
body: {
status: 'error',
Expand Down
4 changes: 3 additions & 1 deletion x-pack/plugins/task_manager/server/routes/health.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,9 @@ export function withServiceStatus(

const level =
status === HealthStatus.OK ? ServiceStatusLevels.available : ServiceStatusLevels.degraded;
const summary = LEVEL_SUMMARY[level.toString()] + reason ? ` - Reason: ${reason}` : '';

const defaultMessage = LEVEL_SUMMARY[level.toString()];
const summary = reason ? `${defaultMessage} - Reason: ${reason}` : defaultMessage;

return [
monitoredHealth,
Expand Down

0 comments on commit 2f3762f

Please sign in to comment.