diff --git a/x-pack/plugins/ml/common/types/ml_server_info.ts b/x-pack/plugins/ml/common/types/ml_server_info.ts index 26dd1758827b4..66142f53add3a 100644 --- a/x-pack/plugins/ml/common/types/ml_server_info.ts +++ b/x-pack/plugins/ml/common/types/ml_server_info.ts @@ -18,6 +18,7 @@ export interface MlServerDefaults { export interface MlServerLimits { max_model_memory_limit?: string; + effective_max_model_memory_limit?: string; } export interface MlInfoResponse { diff --git a/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts b/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts index cd61dd9eddcdd..1cc2a07ddbc88 100644 --- a/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts +++ b/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts @@ -9,6 +9,7 @@ import { APICaller } from 'kibana/server'; import { MLCATEGORY } from '../../../common/constants/field_types'; import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs'; import { fieldsServiceProvider } from '../fields_service'; +import { MlInfoResponse } from '../../../common/types/ml_server_info'; interface ModelMemoryEstimationResult { /** @@ -139,15 +140,9 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller) latestMs: number, allowMMLGreaterThanMax = false ): Promise { - let maxModelMemoryLimit; - try { - const resp = await callAsCurrentUser('ml.info'); - if (resp?.limits?.max_model_memory_limit !== undefined) { - maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase(); - } - } catch (e) { - throw new Error('Unable to retrieve max model memory limit'); - } + const info = await callAsCurrentUser('ml.info'); + const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase(); + const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase(); const { overallCardinality, maxBucketCardinality } = await getCardinalities( analysisConfig, @@ -168,17 +163,32 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller) }) ).model_memory_estimate.toUpperCase(); - let modelMemoryLimit: string = estimatedModelMemoryLimit; + let modelMemoryLimit = estimatedModelMemoryLimit; + let mmlCappedAtMax = false; // if max_model_memory_limit has been set, // make sure the estimated value is not greater than it. - if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) { - // @ts-ignore - const maxBytes = numeral(maxModelMemoryLimit).value(); + if (allowMMLGreaterThanMax === false) { // @ts-ignore const mmlBytes = numeral(estimatedModelMemoryLimit).value(); - if (mmlBytes > maxBytes) { + if (maxModelMemoryLimit !== undefined) { + // @ts-ignore + const maxBytes = numeral(maxModelMemoryLimit).value(); + if (mmlBytes > maxBytes) { + // @ts-ignore + modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`; + mmlCappedAtMax = true; + } + } + + // if we've not already capped the estimated mml at the hard max server setting + // ensure that the estimated mml isn't greater than the effective max mml + if (mmlCappedAtMax === false && effectiveMaxModelMemoryLimit !== undefined) { // @ts-ignore - modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`; + const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value(); + if (mmlBytes > effectiveMaxMmlBytes) { + // @ts-ignore + modelMemoryLimit = `${Math.floor(effectiveMaxMmlBytes / numeral('1MB').value())}MB`; + } } } @@ -186,6 +196,7 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller) estimatedModelMemoryLimit, modelMemoryLimit, ...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}), + ...(effectiveMaxModelMemoryLimit ? { effectiveMaxModelMemoryLimit } : {}), }; }; } diff --git a/x-pack/plugins/ml/server/models/job_validation/messages.js b/x-pack/plugins/ml/server/models/job_validation/messages.js index 3fd90d0a356a1..6cdbc457e6ade 100644 --- a/x-pack/plugins/ml/server/models/job_validation/messages.js +++ b/x-pack/plugins/ml/server/models/job_validation/messages.js @@ -433,6 +433,17 @@ export const getMessages = () => { } ), }, + mml_greater_than_effective_max_mml: { + status: 'WARNING', + text: i18n.translate( + 'xpack.ml.models.jobValidation.messages.mmlGreaterThanEffectiveMaxMmlMessage', + { + defaultMessage: + 'Job will not be able to run in the current cluster because model memory limit is higher than {effectiveMaxModelMemoryLimit}.', + values: { effectiveMaxModelMemoryLimit: '{{effectiveMaxModelMemoryLimit}}' }, + } + ), + }, mml_greater_than_max_mml: { status: 'ERROR', text: i18n.translate('xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage', { diff --git a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.test.ts b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.test.ts index 6b5d5614325bf..bf88716181bb3 100644 --- a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.test.ts +++ b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.test.ts @@ -24,6 +24,7 @@ describe('ML - validateModelMemoryLimit', () => { }, limits: { max_model_memory_limit: '30mb', + effective_max_model_memory_limit: '40mb', }, }; @@ -211,6 +212,30 @@ describe('ML - validateModelMemoryLimit', () => { }); }); + it('Called with no duration or split and mml above limit, no max setting', () => { + const job = getJobConfig(); + const duration = undefined; + // @ts-ignore + job.analysis_limits.model_memory_limit = '31mb'; + + return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => { + const ids = messages.map(m => m.id); + expect(ids).toEqual([]); + }); + }); + + it('Called with no duration or split and mml above limit, no max setting, above effective max mml', () => { + const job = getJobConfig(); + const duration = undefined; + // @ts-ignore + job.analysis_limits.model_memory_limit = '41mb'; + + return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => { + const ids = messages.map(m => m.id); + expect(ids).toEqual(['mml_greater_than_effective_max_mml']); + }); + }); + it('Called with small number of detectors, so estimated mml is under specified mml, no max setting', () => { const dtrs = createDetectors(1); const job = getJobConfig(['instance'], dtrs); diff --git a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts index 16a48addfeaf4..5c3250af6ef46 100644 --- a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts +++ b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts @@ -10,6 +10,7 @@ import { CombinedJob } from '../../../common/types/anomaly_detection_jobs'; import { validateJobObject } from './validate_job_object'; import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit'; import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation'; +import { MlInfoResponse } from '../../../common/types/ml_server_info'; // The minimum value the backend expects is 1MByte const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576; @@ -50,9 +51,9 @@ export async function validateModelMemoryLimit( // retrieve the max_model_memory_limit value from the server // this will be unset unless the user has set this on their cluster - const maxModelMemoryLimit: string | undefined = ( - await callWithRequest('ml.info') - )?.limits?.max_model_memory_limit?.toUpperCase(); + const info = await callWithRequest('ml.info'); + const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase(); + const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase(); if (runCalcModelMemoryTest) { const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)( @@ -113,17 +114,35 @@ export async function validateModelMemoryLimit( // if max_model_memory_limit has been set, // make sure the user defined MML is not greater than it - if (maxModelMemoryLimit !== undefined && mml !== null) { - // @ts-ignore - const maxMmlBytes = numeral(maxModelMemoryLimit).value(); + if (mml !== null) { + let maxMmlExceeded = false; // @ts-ignore const mmlBytes = numeral(mml).value(); - if (mmlBytes > maxMmlBytes) { - messages.push({ - id: 'mml_greater_than_max_mml', - maxModelMemoryLimit, - mml, - }); + + if (maxModelMemoryLimit !== undefined) { + // @ts-ignore + const maxMmlBytes = numeral(maxModelMemoryLimit).value(); + if (mmlBytes > maxMmlBytes) { + maxMmlExceeded = true; + messages.push({ + id: 'mml_greater_than_max_mml', + maxModelMemoryLimit, + mml, + }); + } + } + + if (effectiveMaxModelMemoryLimit !== undefined && maxMmlExceeded === false) { + // @ts-ignore + const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value(); + if (mmlBytes > effectiveMaxMmlBytes) { + messages.push({ + id: 'mml_greater_than_effective_max_mml', + maxModelMemoryLimit, + mml, + effectiveMaxModelMemoryLimit, + }); + } } } diff --git a/x-pack/plugins/translations/translations/ja-JP.json b/x-pack/plugins/translations/translations/ja-JP.json index b19837c59ab97..f9ac675cbbd9e 100644 --- a/x-pack/plugins/translations/translations/ja-JP.json +++ b/x-pack/plugins/translations/translations/ja-JP.json @@ -10060,7 +10060,6 @@ "xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "ジョブ ID が無効です。アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーラインが使用でき、最初と最後を英数字にする必要があります。", "xpack.ml.models.jobValidation.messages.jobIdValidHeading": "ジョブ ID のフォーマットは有効です。", "xpack.ml.models.jobValidation.messages.jobIdValidMessage": "アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーライン、最初と最後を英数字にし、{maxLength, plural, one {# 文字} other {# 文字}}以内にする必要があります。", - "xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "モデルメモリー制限が、このクラスターに構成された最大モデルメモリー制限を超えています。", "xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} はモデルメモリー制限の有効な値ではありません。この値は最低 1MB で、バイト (例: 10MB) で指定する必要があります。", "xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "ジョブの構成の基本要件が満たされていないため、他のチェックをスキップしました。", "xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "バケットスパン", diff --git a/x-pack/plugins/translations/translations/zh-CN.json b/x-pack/plugins/translations/translations/zh-CN.json index 850f2fb207f40..55a59ae8aa0b4 100644 --- a/x-pack/plugins/translations/translations/zh-CN.json +++ b/x-pack/plugins/translations/translations/zh-CN.json @@ -10066,7 +10066,6 @@ "xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "作业 ID 无效.其可以包含小写字母数字(a-z 和 0-9)字符、连字符或下划线,且必须以字母数字字符开头和结尾。", "xpack.ml.models.jobValidation.messages.jobIdValidHeading": "作业 ID 格式有效", "xpack.ml.models.jobValidation.messages.jobIdValidMessage": "小写字母数字(a-z 和 0-9)字符、连字符或下划线,以字母数字字符开头和结尾,且长度不超过 {maxLength, plural, one {# 个字符} other {# 个字符}}。", - "xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "模型内存限制大于为此集群配置的最大模型内存限制。", "xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} 不是有效的模型内存限制值。该值需要至少 1MB,且应以字节为单位(例如 10MB)指定。", "xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "已跳过其他检查,因为未满足作业配置的基本要求。", "xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "存储桶跨度",