diff --git a/packages/kbn-legacy-logging/src/get_logging_config.ts b/packages/kbn-legacy-logging/src/get_logging_config.ts index 5a28ab1271538a..280b9f815c78cb 100644 --- a/packages/kbn-legacy-logging/src/get_logging_config.ts +++ b/packages/kbn-legacy-logging/src/get_logging_config.ts @@ -28,7 +28,10 @@ export function getLoggingConfiguration(config: LegacyLoggingConfig, opsInterval } else if (config.verbose) { _.defaults(events, { log: '*', - ops: '*', + // To avoid duplicate logs, we explicitly disable this in verbose + // mode as it is already provided by the new logging config under + // the `metrics.ops` context. + ops: '!', request: '*', response: '*', error: '*', diff --git a/src/core/README.md b/src/core/README.md index e195bf30c054c8..c73c6aa56bfd0d 100644 --- a/src/core/README.md +++ b/src/core/README.md @@ -28,8 +28,10 @@ Even though the new validation system provided by the `core` is also based on Jo rules tailored to our needs (e.g. `byteSize`, `duration` etc.). That means that config values that were previously accepted by the "legacy" Kibana may be rejected by the `core` now. -Even though `core` has its own logging system it doesn't output log records directly (e.g. to file or terminal), but instead -forward them to the "legacy" Kibana so that they look the same as the rest of the log records throughout Kibana. +### Logging +`core` has its own [logging system](./server/logging/README.md) and will output log records directly (e.g. to file or terminal) when configured. When no +specific configuration is provided, logs are forwarded to the "legacy" Kibana so that they look the same as the rest of the +log records throughout Kibana. ## Core API Review To provide a stable API for plugin developers, it is important that the Core Public and Server API's are stable and diff --git a/src/core/server/config/deprecation/core_deprecations.test.ts b/src/core/server/config/deprecation/core_deprecations.test.ts index a7c6a638265239..a791362d9166f1 100644 --- a/src/core/server/config/deprecation/core_deprecations.test.ts +++ b/src/core/server/config/deprecation/core_deprecations.test.ts @@ -236,4 +236,24 @@ describe('core deprecations', () => { ).toEqual([`worker-src blob:`]); }); }); + + describe('logging.events.ops', () => { + it('warns when ops events are used', () => { + const { messages } = applyCoreDeprecations({ + logging: { events: { ops: '*' } }, + }); + expect(messages).toMatchInlineSnapshot(` + Array [ + "\\"logging.events.ops\\" has been deprecated and will be removed in 8.0. To access ops data moving forward, please enable debug logs for the \\"metrics.ops\\" context in your logging configuration.", + ] + `); + }); + + it('does not warn when other events are configured', () => { + const { messages } = applyCoreDeprecations({ + logging: { events: { log: '*' } }, + }); + expect(messages).toEqual([]); + }); + }); }); diff --git a/src/core/server/config/deprecation/core_deprecations.ts b/src/core/server/config/deprecation/core_deprecations.ts index 6f6e6c3e0522e7..23a3518cd8eb68 100644 --- a/src/core/server/config/deprecation/core_deprecations.ts +++ b/src/core/server/config/deprecation/core_deprecations.ts @@ -103,6 +103,17 @@ const mapManifestServiceUrlDeprecation: ConfigDeprecation = (settings, fromPath, return settings; }; +const opsLoggingEventDeprecation: ConfigDeprecation = (settings, fromPath, log) => { + if (has(settings, 'logging.events.ops')) { + log( + '"logging.events.ops" has been deprecated and will be removed ' + + 'in 8.0. To access ops data moving forward, please enable debug logs for the ' + + '"metrics.ops" context in your logging configuration.' + ); + } + return settings; +}; + export const coreDeprecationProvider: ConfigDeprecationProvider = ({ rename, unusedFromRoot }) => [ unusedFromRoot('savedObjects.indexCheckTimeout'), unusedFromRoot('server.xsrf.token'), @@ -137,4 +148,5 @@ export const coreDeprecationProvider: ConfigDeprecationProvider = ({ rename, unu rewriteBasePathDeprecation, cspRulesDeprecation, mapManifestServiceUrlDeprecation, + opsLoggingEventDeprecation, ]; diff --git a/src/core/server/logging/README.md b/src/core/server/logging/README.md index 8cb704f09ce8cd..cc2b6230d2d332 100644 --- a/src/core/server/logging/README.md +++ b/src/core/server/logging/README.md @@ -312,6 +312,9 @@ logging: - context: telemetry level: all appenders: [json-file-appender] + - context: metrics.ops + level: debug + appenders: [console] ``` Here is what we get with the config above: @@ -324,6 +327,7 @@ Here is what we get with the config above: | server | console, file | fatal | | optimize | console | error | | telemetry | json-file-appender | all | +| metrics.ops | console | debug | The `root` logger has a dedicated configuration node since this context is special and should always exist. By @@ -341,7 +345,25 @@ Or disable logging entirely with `off`: ```yaml logging.root.level: off ``` +### Dedicated loggers + +The `metrics.ops` logger is configured with `debug` level and will automatically output sample system and process information at a regular interval. +The metrics that are logged are a subset of the data collected and are formatted in the log message as follows: + +| Ops formatted log property | Location in metrics service | Log units +| :------------------------- | :-------------------------- | :-------------------------- | +| memory | process.memory.heap.used_in_bytes | [depends on the value](http://numeraljs.com/#format), typically MB or GB | +| uptime | process.uptime_in_millis | HH:mm:ss | +| load | os.load | [ "load for the last 1 min" "load for the last 5 min" "load for the last 15 min"] | +| delay | process.event_loop_delay | ms | + +The log interval is the same as the interval at which system and process information is refreshed and is configurable under `ops.interval`: + +```yaml +ops.interval: 5000 +``` +The minimum interval is 100ms and defaults to 5000ms. ## Usage Usage is very straightforward, one should just get a logger for a specific context and use it to log messages with @@ -478,4 +500,4 @@ TBD | meta | separate property `"meta": {"to": "v8"}` | merged in log record `{... "to": "v8"}` | | pid | `pid: 12345` | `pid: 12345` | | type | N/A | `type: log` | -| error | `{ message, name, stack }` | `{ message, name, stack, code, signal }` | \ No newline at end of file +| error | `{ message, name, stack }` | `{ message, name, stack, code, signal }` | diff --git a/src/core/server/logging/ecs.ts b/src/core/server/logging/ecs.ts new file mode 100644 index 00000000000000..0dbc403fca0b27 --- /dev/null +++ b/src/core/server/logging/ecs.ts @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * and the Server Side Public License, v 1; you may not use this file except in + * compliance with, at your election, the Elastic License or the Server Side + * Public License, v 1. + */ + +/** + * Typings for some ECS fields which core uses internally. + * These are not a complete set of ECS typings and should not + * be used externally; the only types included here are ones + * currently used in core. + * + * @internal + */ + +export interface EcsOpsMetricsEvent { + /** + * These typings were written as of ECS 1.7.0. + * Don't change this value without checking the rest + * of the types to conform to that ECS version. + * + * https://www.elastic.co/guide/en/ecs/1.7/index.html + */ + ecs: { version: '1.7.0' }; + + // base fields + ['@timestamp']?: string; + labels?: Record; + message?: string; + tags?: string[]; + // other fields + process?: EcsProcessField; + event?: EcsEventField; +} + +interface EcsProcessField { + uptime?: number; +} + +export interface EcsEventField { + kind?: EcsEventKind; + category?: EcsEventCategory[]; + type?: EcsEventType; +} + +export enum EcsEventKind { + ALERT = 'alert', + EVENT = 'event', + METRIC = 'metric', + STATE = 'state', + PIPELINE_ERROR = 'pipeline_error', + SIGNAL = 'signal', +} + +export enum EcsEventCategory { + AUTHENTICATION = 'authentication', + CONFIGURATION = 'configuration', + DATABASE = 'database', + DRIVER = 'driver', + FILE = 'file', + HOST = 'host', + IAM = 'iam', + INTRUSION_DETECTION = 'intrusion_detection', + MALWARE = 'malware', + NETWORK = 'network', + PACKAGE = 'package', + PROCESS = 'process', + WEB = 'web', +} + +export enum EcsEventType { + ACCESS = 'access', + ADMIN = 'admin', + ALLOWED = 'allowed', + CHANGE = 'change', + CONNECTION = 'connection', + CREATION = 'creation', + DELETION = 'deletion', + DENIED = 'denied', + END = 'end', + ERROR = 'error', + GROUP = 'group', + INFO = 'info', + INSTALLATION = 'installation', + PROTOCOL = 'protocol', + START = 'start', + USER = 'user', +} diff --git a/src/core/server/logging/index.ts b/src/core/server/logging/index.ts index f024bea1bf3588..18a903af0a9fdf 100644 --- a/src/core/server/logging/index.ts +++ b/src/core/server/logging/index.ts @@ -17,6 +17,13 @@ export { LogLevelId, LogLevel, } from '@kbn/logging'; +export { + EcsOpsMetricsEvent, + EcsEventField, + EcsEventKind, + EcsEventCategory, + EcsEventType, +} from './ecs'; export { config, LoggingConfigType, diff --git a/src/core/server/metrics/logging/get_ops_metrics_log.test.ts b/src/core/server/metrics/logging/get_ops_metrics_log.test.ts new file mode 100644 index 00000000000000..820959910e764f --- /dev/null +++ b/src/core/server/metrics/logging/get_ops_metrics_log.test.ts @@ -0,0 +1,132 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * and the Server Side Public License, v 1; you may not use this file except in + * compliance with, at your election, the Elastic License or the Server Side + * Public License, v 1. + */ + +import { OpsMetrics } from '..'; +import { getEcsOpsMetricsLog } from './get_ops_metrics_log'; + +function createBaseOpsMetrics(): OpsMetrics { + return { + collected_at: new Date('2020-01-01 01:00:00'), + process: { + memory: { + heap: { total_in_bytes: 1, used_in_bytes: 1, size_limit: 1 }, + resident_set_size_in_bytes: 1, + }, + event_loop_delay: 1, + pid: 1, + uptime_in_millis: 1, + }, + os: { + platform: 'darwin' as const, + platformRelease: 'test', + load: { '1m': 1, '5m': 1, '15m': 1 }, + memory: { total_in_bytes: 1, free_in_bytes: 1, used_in_bytes: 1 }, + uptime_in_millis: 1, + }, + response_times: { avg_in_millis: 1, max_in_millis: 1 }, + requests: { disconnects: 1, total: 1, statusCodes: { '200': 1 } }, + concurrent_connections: 1, + }; +} + +function createMockOpsMetrics(testMetrics: Partial): OpsMetrics { + const base = createBaseOpsMetrics(); + return { + ...base, + ...testMetrics, + }; +} +const testMetrics = ({ + process: { + memory: { heap: { used_in_bytes: 100 } }, + uptime_in_millis: 1500, + event_loop_delay: 50, + }, + os: { + load: { + '1m': 10, + '5m': 20, + '15m': 30, + }, + }, +} as unknown) as Partial; + +describe('getEcsOpsMetricsLog', () => { + it('provides correctly formatted message', () => { + const result = getEcsOpsMetricsLog(createMockOpsMetrics(testMetrics)); + expect(result.message).toMatchInlineSnapshot( + `"memory: 100.0B uptime: 0:00:01 load: [10.00,20.00,30.00] delay: 50.000"` + ); + }); + + it('correctly formats process uptime', () => { + const logMeta = getEcsOpsMetricsLog(createMockOpsMetrics(testMetrics)); + expect(logMeta.process!.uptime).toEqual(1); + }); + + it('excludes values from the message if unavailable', () => { + const baseMetrics = createBaseOpsMetrics(); + const missingMetrics = ({ + ...baseMetrics, + process: {}, + os: {}, + } as unknown) as OpsMetrics; + const logMeta = getEcsOpsMetricsLog(missingMetrics); + expect(logMeta.message).toMatchInlineSnapshot(`""`); + }); + + it('specifies correct ECS version', () => { + const logMeta = getEcsOpsMetricsLog(createBaseOpsMetrics()); + expect(logMeta.ecs.version).toBe('1.7.0'); + }); + + it('provides an ECS-compatible response', () => { + const logMeta = getEcsOpsMetricsLog(createBaseOpsMetrics()); + expect(logMeta).toMatchInlineSnapshot(` + Object { + "ecs": Object { + "version": "1.7.0", + }, + "event": Object { + "category": Array [ + "process", + "host", + ], + "kind": "metric", + "type": "info", + }, + "host": Object { + "os": Object { + "load": Object { + "15m": 1, + "1m": 1, + "5m": 1, + }, + }, + }, + "message": "memory: 1.0B load: [1.00,1.00,1.00] delay: 1.000", + "process": Object { + "eventLoopDelay": 1, + "memory": Object { + "heap": Object { + "usedInBytes": 1, + }, + }, + "uptime": 0, + }, + } + `); + }); + + it('logs ECS fields in the log meta', () => { + const logMeta = getEcsOpsMetricsLog(createBaseOpsMetrics()); + expect(logMeta.event!.kind).toBe('metric'); + expect(logMeta.event!.category).toEqual(expect.arrayContaining(['process', 'host'])); + expect(logMeta.event!.type).toBe('info'); + }); +}); diff --git a/src/core/server/metrics/logging/get_ops_metrics_log.ts b/src/core/server/metrics/logging/get_ops_metrics_log.ts new file mode 100644 index 00000000000000..361cac0bc310c5 --- /dev/null +++ b/src/core/server/metrics/logging/get_ops_metrics_log.ts @@ -0,0 +1,78 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * and the Server Side Public License, v 1; you may not use this file except in + * compliance with, at your election, the Elastic License or the Server Side + * Public License, v 1. + */ + +import numeral from '@elastic/numeral'; +import { EcsOpsMetricsEvent, EcsEventKind, EcsEventCategory, EcsEventType } from '../../logging'; +import { OpsMetrics } from '..'; + +const ECS_VERSION = '1.7.0'; +/** + * Converts ops metrics into ECS-compliant `LogMeta` for logging + * + * @internal + */ +export function getEcsOpsMetricsLog(metrics: OpsMetrics): EcsOpsMetricsEvent { + const { process, os } = metrics; + const processMemoryUsedInBytes = process?.memory?.heap?.used_in_bytes; + const processMemoryUsedInBytesMsg = processMemoryUsedInBytes + ? `memory: ${numeral(processMemoryUsedInBytes).format('0.0b')} ` + : ''; + + // ECS process.uptime is in seconds: + const uptimeVal = process?.uptime_in_millis + ? Math.floor(process.uptime_in_millis / 1000) + : undefined; + + // HH:mm:ss message format for backward compatibility + const uptimeValMsg = uptimeVal ? `uptime: ${numeral(uptimeVal).format('00:00:00')} ` : ''; + + // Event loop delay is in ms + const eventLoopDelayVal = process?.event_loop_delay; + const eventLoopDelayValMsg = eventLoopDelayVal + ? `delay: ${numeral(process?.event_loop_delay).format('0.000')}` + : ''; + + const loadEntries = { + '1m': os?.load ? os?.load['1m'] : undefined, + '5m': os?.load ? os?.load['5m'] : undefined, + '15m': os?.load ? os?.load['15m'] : undefined, + }; + + const loadVals = [...Object.values(os?.load ?? [])]; + const loadValsMsg = + loadVals.length > 0 + ? `load: [${loadVals.map((val: number) => { + return numeral(val).format('0.00'); + })}] ` + : ''; + + return { + ecs: { version: ECS_VERSION }, + message: `${processMemoryUsedInBytesMsg}${uptimeValMsg}${loadValsMsg}${eventLoopDelayValMsg}`, + event: { + kind: EcsEventKind.METRIC, + category: [EcsEventCategory.PROCESS, EcsEventCategory.HOST], + type: EcsEventType.INFO, + }, + process: { + uptime: uptimeVal, + // @ts-expect-error custom fields not yet part of ECS + memory: { + heap: { + usedInBytes: processMemoryUsedInBytes, + }, + }, + eventLoopDelay: eventLoopDelayVal, + }, + host: { + os: { + load: loadEntries, + }, + }, + }; +} diff --git a/src/core/server/metrics/logging/index.ts b/src/core/server/metrics/logging/index.ts new file mode 100644 index 00000000000000..5b3f9aed56be0f --- /dev/null +++ b/src/core/server/metrics/logging/index.ts @@ -0,0 +1,9 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * and the Server Side Public License, v 1; you may not use this file except in + * compliance with, at your election, the Elastic License or the Server Side + * Public License, v 1. + */ + +export { getEcsOpsMetricsLog } from './get_ops_metrics_log'; diff --git a/src/core/server/metrics/metrics_service.test.ts b/src/core/server/metrics/metrics_service.test.ts index 5ba29606d8a809..e21bad1ef4be77 100644 --- a/src/core/server/metrics/metrics_service.test.ts +++ b/src/core/server/metrics/metrics_service.test.ts @@ -13,12 +13,15 @@ import { mockOpsCollector } from './metrics_service.test.mocks'; import { MetricsService } from './metrics_service'; import { mockCoreContext } from '../core_context.mock'; import { httpServiceMock } from '../http/http_service.mock'; +import { loggingSystemMock } from '../logging/logging_system.mock'; import { take } from 'rxjs/operators'; const testInterval = 100; const dummyMetrics = { metricA: 'value', metricB: 'otherValue' }; +const logger = loggingSystemMock.create(); + describe('MetricsService', () => { const httpMock = httpServiceMock.createInternalSetupContract(); let metricsService: MetricsService; @@ -29,7 +32,7 @@ describe('MetricsService', () => { const configService = configServiceMock.create({ atPath: { interval: moment.duration(testInterval) }, }); - const coreContext = mockCoreContext.create({ configService }); + const coreContext = mockCoreContext.create({ logger, configService }); metricsService = new MetricsService(coreContext); }); @@ -118,6 +121,100 @@ describe('MetricsService', () => { expect(await nextEmission()).toEqual({ metric: 'first' }); expect(await nextEmission()).toEqual({ metric: 'second' }); }); + + it('logs the metrics at every interval', async () => { + const firstMetrics = { + process: { + memory: { heap: { used_in_bytes: 100 } }, + uptime_in_millis: 1500, + event_loop_delay: 50, + }, + os: { + load: { + '1m': 10, + '5m': 20, + '15m': 30, + }, + }, + }; + const secondMetrics = { + process: { + memory: { heap: { used_in_bytes: 200 } }, + uptime_in_millis: 3000, + event_loop_delay: 100, + }, + os: { + load: { + '1m': 20, + '5m': 30, + '15m': 40, + }, + }, + }; + + const opsLogger = logger.get('metrics', 'ops'); + + mockOpsCollector.collect + .mockResolvedValueOnce(firstMetrics) + .mockResolvedValueOnce(secondMetrics); + await metricsService.setup({ http: httpMock }); + const { getOpsMetrics$ } = await metricsService.start(); + + const nextEmission = async () => { + jest.advanceTimersByTime(testInterval); + const emission = await getOpsMetrics$().pipe(take(1)).toPromise(); + await new Promise((resolve) => process.nextTick(resolve)); + return emission; + }; + + await nextEmission(); + const opsLogs = loggingSystemMock.collect(opsLogger).debug; + expect(opsLogs.length).toEqual(2); + expect(opsLogs[0][1]).not.toEqual(opsLogs[1][1]); + }); + + it('omits metrics from log message if they are missing or malformed', async () => { + const opsLogger = logger.get('metrics', 'ops'); + mockOpsCollector.collect.mockResolvedValueOnce({ secondMetrics: 'metrics' }); + await metricsService.setup({ http: httpMock }); + await metricsService.start(); + expect(loggingSystemMock.collect(opsLogger).debug[0]).toMatchInlineSnapshot(` + Array [ + "", + Object { + "ecs": Object { + "version": "1.7.0", + }, + "event": Object { + "category": Array [ + "process", + "host", + ], + "kind": "metric", + "type": "info", + }, + "host": Object { + "os": Object { + "load": Object { + "15m": undefined, + "1m": undefined, + "5m": undefined, + }, + }, + }, + "process": Object { + "eventLoopDelay": undefined, + "memory": Object { + "heap": Object { + "usedInBytes": undefined, + }, + }, + "uptime": undefined, + }, + }, + ] + `); + }); }); describe('#stop', () => { diff --git a/src/core/server/metrics/metrics_service.ts b/src/core/server/metrics/metrics_service.ts index 24b2b1b67a07a3..460035ad2e2989 100644 --- a/src/core/server/metrics/metrics_service.ts +++ b/src/core/server/metrics/metrics_service.ts @@ -15,6 +15,7 @@ import { InternalHttpServiceSetup } from '../http'; import { InternalMetricsServiceSetup, InternalMetricsServiceStart, OpsMetrics } from './types'; import { OpsMetricsCollector } from './ops_metrics_collector'; import { opsConfig, OpsConfigType } from './ops_config'; +import { getEcsOpsMetricsLog } from './logging'; interface MetricsServiceSetupDeps { http: InternalHttpServiceSetup; @@ -24,6 +25,7 @@ interface MetricsServiceSetupDeps { export class MetricsService implements CoreService { private readonly logger: Logger; + private readonly opsMetricsLogger: Logger; private metricsCollector?: OpsMetricsCollector; private collectInterval?: NodeJS.Timeout; private metrics$ = new ReplaySubject(1); @@ -31,6 +33,7 @@ export class MetricsService constructor(private readonly coreContext: CoreContext) { this.logger = coreContext.logger.get('metrics'); + this.opsMetricsLogger = coreContext.logger.get('metrics', 'ops'); } public async setup({ http }: MetricsServiceSetupDeps): Promise { @@ -69,8 +72,9 @@ export class MetricsService } private async refreshMetrics() { - this.logger.debug('Refreshing metrics'); const metrics = await this.metricsCollector!.collect(); + const { message, ...meta } = getEcsOpsMetricsLog(metrics); + this.opsMetricsLogger.debug(message!, meta); this.metricsCollector!.reset(); this.metrics$.next(metrics); }