Skip to content

Commit

Permalink
[ML] Show warning when the model memory limit is higher than the memo…
Browse files Browse the repository at this point in the history
…ry available in the ML node (#65652) (#65842)

* [ML] Show warning when the model memory limit is higher than the memory available in the ML node

* reverting UI check

* removing from UI job validator

* adding cap to estimate mml

* adding mml value to message

* fixing translations

* updating translations

* fixing translation ids

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
  • Loading branch information
jgowdyelastic and elasticmachine authored May 11, 2020
1 parent ed651a8 commit d11439b
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 29 deletions.
1 change: 1 addition & 0 deletions x-pack/plugins/ml/common/types/ml_server_info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export interface MlServerDefaults {

export interface MlServerLimits {
max_model_memory_limit?: string;
effective_max_model_memory_limit?: string;
}

export interface MlInfoResponse {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { APICaller } from 'kibana/server';
import { MLCATEGORY } from '../../../common/constants/field_types';
import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs';
import { fieldsServiceProvider } from '../fields_service';
import { MlInfoResponse } from '../../../common/types/ml_server_info';

interface ModelMemoryEstimationResult {
/**
Expand Down Expand Up @@ -139,15 +140,9 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
latestMs: number,
allowMMLGreaterThanMax = false
): Promise<ModelMemoryEstimationResult> {
let maxModelMemoryLimit;
try {
const resp = await callAsCurrentUser('ml.info');
if (resp?.limits?.max_model_memory_limit !== undefined) {
maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase();
}
} catch (e) {
throw new Error('Unable to retrieve max model memory limit');
}
const info = await callAsCurrentUser<MlInfoResponse>('ml.info');
const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();

const { overallCardinality, maxBucketCardinality } = await getCardinalities(
analysisConfig,
Expand All @@ -168,24 +163,40 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
})
).model_memory_estimate.toUpperCase();

let modelMemoryLimit: string = estimatedModelMemoryLimit;
let modelMemoryLimit = estimatedModelMemoryLimit;
let mmlCappedAtMax = false;
// if max_model_memory_limit has been set,
// make sure the estimated value is not greater than it.
if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxBytes = numeral(maxModelMemoryLimit).value();
if (allowMMLGreaterThanMax === false) {
// @ts-ignore
const mmlBytes = numeral(estimatedModelMemoryLimit).value();
if (mmlBytes > maxBytes) {
if (maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxBytes = numeral(maxModelMemoryLimit).value();
if (mmlBytes > maxBytes) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
mmlCappedAtMax = true;
}
}

// if we've not already capped the estimated mml at the hard max server setting
// ensure that the estimated mml isn't greater than the effective max mml
if (mmlCappedAtMax === false && effectiveMaxModelMemoryLimit !== undefined) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
if (mmlBytes > effectiveMaxMmlBytes) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(effectiveMaxMmlBytes / numeral('1MB').value())}MB`;
}
}
}

return {
estimatedModelMemoryLimit,
modelMemoryLimit,
...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}),
...(effectiveMaxModelMemoryLimit ? { effectiveMaxModelMemoryLimit } : {}),
};
};
}
11 changes: 11 additions & 0 deletions x-pack/plugins/ml/server/models/job_validation/messages.js
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,17 @@ export const getMessages = () => {
}
),
},
mml_greater_than_effective_max_mml: {
status: 'WARNING',
text: i18n.translate(
'xpack.ml.models.jobValidation.messages.mmlGreaterThanEffectiveMaxMmlMessage',
{
defaultMessage:
'Job will not be able to run in the current cluster because model memory limit is higher than {effectiveMaxModelMemoryLimit}.',
values: { effectiveMaxModelMemoryLimit: '{{effectiveMaxModelMemoryLimit}}' },
}
),
},
mml_greater_than_max_mml: {
status: 'ERROR',
text: i18n.translate('xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage', {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ describe('ML - validateModelMemoryLimit', () => {
},
limits: {
max_model_memory_limit: '30mb',
effective_max_model_memory_limit: '40mb',
},
};

Expand Down Expand Up @@ -211,6 +212,30 @@ describe('ML - validateModelMemoryLimit', () => {
});
});

it('Called with no duration or split and mml above limit, no max setting', () => {
const job = getJobConfig();
const duration = undefined;
// @ts-ignore
job.analysis_limits.model_memory_limit = '31mb';

return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).toEqual([]);
});
});

it('Called with no duration or split and mml above limit, no max setting, above effective max mml', () => {
const job = getJobConfig();
const duration = undefined;
// @ts-ignore
job.analysis_limits.model_memory_limit = '41mb';

return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).toEqual(['mml_greater_than_effective_max_mml']);
});
});

it('Called with small number of detectors, so estimated mml is under specified mml, no max setting', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
import { validateJobObject } from './validate_job_object';
import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit';
import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
import { MlInfoResponse } from '../../../common/types/ml_server_info';

// The minimum value the backend expects is 1MByte
const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
Expand Down Expand Up @@ -50,9 +51,9 @@ export async function validateModelMemoryLimit(

// retrieve the max_model_memory_limit value from the server
// this will be unset unless the user has set this on their cluster
const maxModelMemoryLimit: string | undefined = (
await callWithRequest('ml.info')
)?.limits?.max_model_memory_limit?.toUpperCase();
const info = await callWithRequest<MlInfoResponse>('ml.info');
const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();

if (runCalcModelMemoryTest) {
const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)(
Expand Down Expand Up @@ -113,17 +114,35 @@ export async function validateModelMemoryLimit(

// if max_model_memory_limit has been set,
// make sure the user defined MML is not greater than it
if (maxModelMemoryLimit !== undefined && mml !== null) {
// @ts-ignore
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
if (mml !== null) {
let maxMmlExceeded = false;
// @ts-ignore
const mmlBytes = numeral(mml).value();
if (mmlBytes > maxMmlBytes) {
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml,
});

if (maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
if (mmlBytes > maxMmlBytes) {
maxMmlExceeded = true;
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml,
});
}
}

if (effectiveMaxModelMemoryLimit !== undefined && maxMmlExceeded === false) {
// @ts-ignore
const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
if (mmlBytes > effectiveMaxMmlBytes) {
messages.push({
id: 'mml_greater_than_effective_max_mml',
maxModelMemoryLimit,
mml,
effectiveMaxModelMemoryLimit,
});
}
}
}

Expand Down
1 change: 0 additions & 1 deletion x-pack/plugins/translations/translations/ja-JP.json
Original file line number Diff line number Diff line change
Expand Up @@ -10065,7 +10065,6 @@
"xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "ジョブ ID が無効です。アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーラインが使用でき、最初と最後を英数字にする必要があります。",
"xpack.ml.models.jobValidation.messages.jobIdValidHeading": "ジョブ ID のフォーマットは有効です。",
"xpack.ml.models.jobValidation.messages.jobIdValidMessage": "アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーライン、最初と最後を英数字にし、{maxLength, plural, one {# 文字} other {# 文字}}以内にする必要があります。",
"xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "モデルメモリー制限が、このクラスターに構成された最大モデルメモリー制限を超えています。",
"xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} はモデルメモリー制限の有効な値ではありません。この値は最低 1MB で、バイト (例: 10MB) で指定する必要があります。",
"xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "ジョブの構成の基本要件が満たされていないため、他のチェックをスキップしました。",
"xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "バケットスパン",
Expand Down
1 change: 0 additions & 1 deletion x-pack/plugins/translations/translations/zh-CN.json
Original file line number Diff line number Diff line change
Expand Up @@ -10071,7 +10071,6 @@
"xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "作业 ID 无效.其可以包含小写字母数字(a-z 和 0-9)字符、连字符或下划线,且必须以字母数字字符开头和结尾。",
"xpack.ml.models.jobValidation.messages.jobIdValidHeading": "作业 ID 格式有效",
"xpack.ml.models.jobValidation.messages.jobIdValidMessage": "小写字母数字(a-z 和 0-9)字符、连字符或下划线,以字母数字字符开头和结尾,且长度不超过 {maxLength, plural, one {# 个字符} other {# 个字符}}。",
"xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "模型内存限制大于为此集群配置的最大模型内存限制。",
"xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} 不是有效的模型内存限制值。该值需要至少 1MB,且应以字节为单位(例如 10MB)指定。",
"xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "已跳过其他检查,因为未满足作业配置的基本要求。",
"xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "存储桶跨度",
Expand Down

0 comments on commit d11439b

Please sign in to comment.