elastic · jgowdyelastic · May 8, 2020 · May 7, 2020 · May 7, 2020 · May 7, 2020
diff --git a/x-pack/plugins/ml/common/types/ml_server_info.ts b/x-pack/plugins/ml/common/types/ml_server_info.ts
@@ -18,6 +18,7 @@ export interface MlServerDefaults {
 
 export interface MlServerLimits {
   max_model_memory_limit?: string;
+  effective_max_model_memory_limit?: string;
 }
 
 export interface MlInfoResponse {

diff --git a/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts b/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts
@@ -9,6 +9,7 @@ import { APICaller } from 'kibana/server';
 import { MLCATEGORY } from '../../../common/constants/field_types';
 import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs';
 import { fieldsServiceProvider } from '../fields_service';
+import { MlInfoResponse } from '../../../common/types/ml_server_info';
 
 interface ModelMemoryEstimationResult {
   /**
@@ -139,15 +140,9 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
     latestMs: number,
     allowMMLGreaterThanMax = false
   ): Promise<ModelMemoryEstimationResult> {
-    let maxModelMemoryLimit;
-    try {
-      const resp = await callAsCurrentUser('ml.info');
-      if (resp?.limits?.max_model_memory_limit !== undefined) {
-        maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase();
-      }
-    } catch (e) {
-      throw new Error('Unable to retrieve max model memory limit');
-    }
+    const info = await callAsCurrentUser<MlInfoResponse>('ml.info');
+    const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
+    const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();
 
     const { overallCardinality, maxBucketCardinality } = await getCardinalities(
       analysisConfig,
@@ -168,24 +163,40 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
       })
     ).model_memory_estimate.toUpperCase();
 
-    let modelMemoryLimit: string = estimatedModelMemoryLimit;
+    let modelMemoryLimit = estimatedModelMemoryLimit;
+    let mmlCappedAtMax = false;
     // if max_model_memory_limit has been set,
     // make sure the estimated value is not greater than it.
-    if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) {
-      // @ts-ignore
-      const maxBytes = numeral(maxModelMemoryLimit).value();
+    if (allowMMLGreaterThanMax === false) {
       // @ts-ignore
       const mmlBytes = numeral(estimatedModelMemoryLimit).value();
-      if (mmlBytes > maxBytes) {
+      if (maxModelMemoryLimit !== undefined) {
+        // @ts-ignore
+        const maxBytes = numeral(maxModelMemoryLimit).value();
+        if (mmlBytes > maxBytes) {
+          // @ts-ignore
+          modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
+          mmlCappedAtMax = true;
+        }
+      }
+
+      // if we've not already capped the estimated mml at the hard max server setting
+      // ensure that the estimated mml isn't greater than the effective max mml
+      if (mmlCappedAtMax === false && effectiveMaxModelMemoryLimit !== undefined) {
         // @ts-ignore
-        modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
+        const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
+        if (mmlBytes > effectiveMaxMmlBytes) {
+          // @ts-ignore
+          modelMemoryLimit = `${Math.floor(effectiveMaxMmlBytes / numeral('1MB').value())}MB`;
+        }
       }
     }
 
     return {
       estimatedModelMemoryLimit,
       modelMemoryLimit,
       ...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}),
+      ...(effectiveMaxModelMemoryLimit ? { effectiveMaxModelMemoryLimit } : {}),
     };
   };
 }
diff --git a/x-pack/plugins/ml/server/models/job_validation/messages.js b/x-pack/plugins/ml/server/models/job_validation/messages.js
@@ -433,13 +433,24 @@ export const getMessages = () => {
         }
       ),
     },
-    mml_greater_than_max_mml: {
-      status: 'ERROR',
+    mml_greater_than_effective_max_mml: {
+      status: 'WARNING',
       text: i18n.translate('xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage', {
         defaultMessage:
-          'The model memory limit is greater than the max model memory limit configured for this cluster.',
+          'Job will not be able to run in the current cluster because model memory limit is higher than {effectiveMaxModelMemoryLimit}.',
+        values: { effectiveMaxModelMemoryLimit: '{{effectiveMaxModelMemoryLimit}}' },
       }),
     },
+    mml_greater_than_max_mml: {
+      status: 'ERROR',
+      text: i18n.translate(
+        'xpack.ml.models.jobValidation.messages.mmlGreaterThanEffectiveMaxMmlMessage',
+        {
+          defaultMessage:
+            'The model memory limit is greater than the max model memory limit configured for this cluster.',
+        }
+      ),
+    },
     mml_value_invalid: {
       status: 'ERROR',
       text: i18n.translate('xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage', {

diff --git a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.test.ts b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.test.ts
@@ -24,6 +24,7 @@ describe('ML - validateModelMemoryLimit', () => {
     },
     limits: {
       max_model_memory_limit: '30mb',
+      effective_max_model_memory_limit: '40mb',
     },
   };
 
@@ -211,6 +212,30 @@ describe('ML - validateModelMemoryLimit', () => {
     });
   });
 
+  it('Called with no duration or split and mml above limit, no max setting', () => {
+    const job = getJobConfig();
+    const duration = undefined;
+    // @ts-ignore
+    job.analysis_limits.model_memory_limit = '31mb';
+
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
+      const ids = messages.map(m => m.id);
+      expect(ids).toEqual([]);
+    });
+  });
+
+  it('Called with no duration or split and mml above limit, no max setting, above effective max mml', () => {
+    const job = getJobConfig();
+    const duration = undefined;
+    // @ts-ignore
+    job.analysis_limits.model_memory_limit = '41mb';
+
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
+      const ids = messages.map(m => m.id);
+      expect(ids).toEqual(['mml_greater_than_effective_max_mml']);
+    });
+  });
+
   it('Called with small number of detectors, so estimated mml is under specified mml, no max setting', () => {
     const dtrs = createDetectors(1);
     const job = getJobConfig(['instance'], dtrs);

diff --git a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts
@@ -10,6 +10,7 @@ import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
 import { validateJobObject } from './validate_job_object';
 import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit';
 import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
+import { MlInfoResponse } from '../../../common/types/ml_server_info';
 
 // The minimum value the backend expects is 1MByte
 const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
@@ -50,9 +51,9 @@ export async function validateModelMemoryLimit(
 
   // retrieve the max_model_memory_limit value from the server
   // this will be unset unless the user has set this on their cluster
-  const maxModelMemoryLimit: string | undefined = (
-    await callWithRequest('ml.info')
-  )?.limits?.max_model_memory_limit?.toUpperCase();
+  const info = await callWithRequest<MlInfoResponse>('ml.info');
+  const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
+  const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();
 
   if (runCalcModelMemoryTest) {
     const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)(
@@ -113,17 +114,35 @@ export async function validateModelMemoryLimit(
 
   // if max_model_memory_limit has been set,
   // make sure the user defined MML is not greater than it
-  if (maxModelMemoryLimit !== undefined && mml !== null) {
-    // @ts-ignore
-    const maxMmlBytes = numeral(maxModelMemoryLimit).value();
+  if (mml !== null) {
+    let maxMmlExceeded = false;
     // @ts-ignore
     const mmlBytes = numeral(mml).value();
-    if (mmlBytes > maxMmlBytes) {
-      messages.push({
-        id: 'mml_greater_than_max_mml',
-        maxModelMemoryLimit,
-        mml,
-      });
+
+    if (maxModelMemoryLimit !== undefined) {
+      // @ts-ignore
+      const maxMmlBytes = numeral(maxModelMemoryLimit).value();
+      if (mmlBytes > maxMmlBytes) {
+        maxMmlExceeded = true;
+        messages.push({
+          id: 'mml_greater_than_max_mml',
+          maxModelMemoryLimit,
+          mml,
+        });
+      }
+    }
+
+    if (effectiveMaxModelMemoryLimit !== undefined && maxMmlExceeded === false) {
+      // @ts-ignore
+      const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
+      if (mmlBytes > effectiveMaxMmlBytes) {
+        messages.push({
+          id: 'mml_greater_than_effective_max_mml',
+          maxModelMemoryLimit,
+          mml,
+          effectiveMaxModelMemoryLimit,
+        });
+      }
     }
   }