Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Show warning when the model memory limit is higher than the memory available in the ML node #65652

Merged
merged 8 commits into from
May 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions x-pack/plugins/ml/common/types/ml_server_info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export interface MlServerDefaults {

export interface MlServerLimits {
max_model_memory_limit?: string;
effective_max_model_memory_limit?: string;
}

export interface MlInfoResponse {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { APICaller } from 'kibana/server';
import { MLCATEGORY } from '../../../common/constants/field_types';
import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs';
import { fieldsServiceProvider } from '../fields_service';
import { MlInfoResponse } from '../../../common/types/ml_server_info';

interface ModelMemoryEstimationResult {
/**
Expand Down Expand Up @@ -139,15 +140,9 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
latestMs: number,
allowMMLGreaterThanMax = false
): Promise<ModelMemoryEstimationResult> {
let maxModelMemoryLimit;
try {
const resp = await callAsCurrentUser('ml.info');
if (resp?.limits?.max_model_memory_limit !== undefined) {
maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase();
}
} catch (e) {
throw new Error('Unable to retrieve max model memory limit');
}
const info = await callAsCurrentUser<MlInfoResponse>('ml.info');
const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();

const { overallCardinality, maxBucketCardinality } = await getCardinalities(
analysisConfig,
Expand All @@ -168,24 +163,40 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
})
).model_memory_estimate.toUpperCase();

let modelMemoryLimit: string = estimatedModelMemoryLimit;
let modelMemoryLimit = estimatedModelMemoryLimit;
let mmlCappedAtMax = false;
// if max_model_memory_limit has been set,
// make sure the estimated value is not greater than it.
if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxBytes = numeral(maxModelMemoryLimit).value();
if (allowMMLGreaterThanMax === false) {
// @ts-ignore
const mmlBytes = numeral(estimatedModelMemoryLimit).value();
if (mmlBytes > maxBytes) {
if (maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxBytes = numeral(maxModelMemoryLimit).value();
if (mmlBytes > maxBytes) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
mmlCappedAtMax = true;
}
}

// if we've not already capped the estimated mml at the hard max server setting
// ensure that the estimated mml isn't greater than the effective max mml
if (mmlCappedAtMax === false && effectiveMaxModelMemoryLimit !== undefined) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
if (mmlBytes > effectiveMaxMmlBytes) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(effectiveMaxMmlBytes / numeral('1MB').value())}MB`;
}
}
}

return {
estimatedModelMemoryLimit,
modelMemoryLimit,
...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}),
...(effectiveMaxModelMemoryLimit ? { effectiveMaxModelMemoryLimit } : {}),
};
};
}
11 changes: 11 additions & 0 deletions x-pack/plugins/ml/server/models/job_validation/messages.js
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,17 @@ export const getMessages = () => {
}
),
},
mml_greater_than_effective_max_mml: {
status: 'WARNING',
text: i18n.translate(
'xpack.ml.models.jobValidation.messages.mmlGreaterThanEffectiveMaxMmlMessage',
{
defaultMessage:
'Job will not be able to run in the current cluster because model memory limit is higher than {effectiveMaxModelMemoryLimit}.',
values: { effectiveMaxModelMemoryLimit: '{{effectiveMaxModelMemoryLimit}}' },
}
),
},
mml_greater_than_max_mml: {
status: 'ERROR',
text: i18n.translate('xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage', {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ describe('ML - validateModelMemoryLimit', () => {
},
limits: {
max_model_memory_limit: '30mb',
effective_max_model_memory_limit: '40mb',
},
};

Expand Down Expand Up @@ -211,6 +212,30 @@ describe('ML - validateModelMemoryLimit', () => {
});
});

it('Called with no duration or split and mml above limit, no max setting', () => {
const job = getJobConfig();
const duration = undefined;
// @ts-ignore
job.analysis_limits.model_memory_limit = '31mb';

return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).toEqual([]);
});
});

it('Called with no duration or split and mml above limit, no max setting, above effective max mml', () => {
const job = getJobConfig();
const duration = undefined;
// @ts-ignore
job.analysis_limits.model_memory_limit = '41mb';

return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).toEqual(['mml_greater_than_effective_max_mml']);
});
});

it('Called with small number of detectors, so estimated mml is under specified mml, no max setting', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
import { validateJobObject } from './validate_job_object';
import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit';
import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
import { MlInfoResponse } from '../../../common/types/ml_server_info';

// The minimum value the backend expects is 1MByte
const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
Expand Down Expand Up @@ -50,9 +51,9 @@ export async function validateModelMemoryLimit(

// retrieve the max_model_memory_limit value from the server
// this will be unset unless the user has set this on their cluster
const maxModelMemoryLimit: string | undefined = (
await callWithRequest('ml.info')
)?.limits?.max_model_memory_limit?.toUpperCase();
const info = await callWithRequest<MlInfoResponse>('ml.info');
const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();

if (runCalcModelMemoryTest) {
const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)(
Expand Down Expand Up @@ -113,17 +114,35 @@ export async function validateModelMemoryLimit(

// if max_model_memory_limit has been set,
// make sure the user defined MML is not greater than it
if (maxModelMemoryLimit !== undefined && mml !== null) {
// @ts-ignore
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
if (mml !== null) {
let maxMmlExceeded = false;
// @ts-ignore
const mmlBytes = numeral(mml).value();
if (mmlBytes > maxMmlBytes) {
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml,
});

if (maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
if (mmlBytes > maxMmlBytes) {
maxMmlExceeded = true;
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml,
});
}
}

if (effectiveMaxModelMemoryLimit !== undefined && maxMmlExceeded === false) {
// @ts-ignore
const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
if (mmlBytes > effectiveMaxMmlBytes) {
messages.push({
id: 'mml_greater_than_effective_max_mml',
maxModelMemoryLimit,
mml,
effectiveMaxModelMemoryLimit,
});
}
}
}

Expand Down
1 change: 0 additions & 1 deletion x-pack/plugins/translations/translations/ja-JP.json
Original file line number Diff line number Diff line change
Expand Up @@ -10054,7 +10054,6 @@
"xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "ジョブ ID が無効です。アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーラインが使用でき、最初と最後を英数字にする必要があります。",
"xpack.ml.models.jobValidation.messages.jobIdValidHeading": "ジョブ ID のフォーマットは有効です。",
"xpack.ml.models.jobValidation.messages.jobIdValidMessage": "アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーライン、最初と最後を英数字にし、{maxLength, plural, one {# 文字} other {# 文字}}以内にする必要があります。",
"xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "モデルメモリー制限が、このクラスターに構成された最大モデルメモリー制限を超えています。",
"xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} はモデルメモリー制限の有効な値ではありません。この値は最低 1MB で、バイト (例: 10MB) で指定する必要があります。",
"xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "ジョブの構成の基本要件が満たされていないため、他のチェックをスキップしました。",
"xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "バケットスパン",
Expand Down
1 change: 0 additions & 1 deletion x-pack/plugins/translations/translations/zh-CN.json
Original file line number Diff line number Diff line change
Expand Up @@ -10060,7 +10060,6 @@
"xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "作业 ID 无效.其可以包含小写字母数字(a-z 和 0-9)字符、连字符或下划线,且必须以字母数字字符开头和结尾。",
"xpack.ml.models.jobValidation.messages.jobIdValidHeading": "作业 ID 格式有效",
"xpack.ml.models.jobValidation.messages.jobIdValidMessage": "小写字母数字(a-z 和 0-9)字符、连字符或下划线,以字母数字字符开头和结尾,且长度不超过 {maxLength, plural, one {# 个字符} other {# 个字符}}。",
"xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "模型内存限制大于为此集群配置的最大模型内存限制。",
"xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} 不是有效的模型内存限制值。该值需要至少 1MB,且应以字节为单位(例如 10MB)指定。",
"xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "已跳过其他检查,因为未满足作业配置的基本要求。",
"xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "存储桶跨度",
Expand Down