Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Show warning when the model memory limit is higher than the memory available in the ML node #65652

Merged
merged 8 commits into from
May 8, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions x-pack/plugins/ml/common/types/ml_server_info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export interface MlServerDefaults {

export interface MlServerLimits {
max_model_memory_limit?: string;
effective_max_model_memory_limit?: string;
}

export interface MlInfoResponse {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { APICaller } from 'kibana/server';
import { MLCATEGORY } from '../../../common/constants/field_types';
import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs';
import { fieldsServiceProvider } from '../fields_service';
import { MlInfoResponse } from '../../../common/types/ml_server_info';

interface ModelMemoryEstimationResult {
/**
Expand Down Expand Up @@ -139,15 +140,9 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
latestMs: number,
allowMMLGreaterThanMax = false
): Promise<ModelMemoryEstimationResult> {
let maxModelMemoryLimit;
try {
const resp = await callAsCurrentUser('ml.info');
if (resp?.limits?.max_model_memory_limit !== undefined) {
maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase();
}
} catch (e) {
throw new Error('Unable to retrieve max model memory limit');
}
const info = await callAsCurrentUser<MlInfoResponse>('ml.info');
const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();

const { overallCardinality, maxBucketCardinality } = await getCardinalities(
analysisConfig,
Expand All @@ -168,24 +163,40 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
})
).model_memory_estimate.toUpperCase();

let modelMemoryLimit: string = estimatedModelMemoryLimit;
let modelMemoryLimit = estimatedModelMemoryLimit;
let mmlCappedAtMax = false;
// if max_model_memory_limit has been set,
// make sure the estimated value is not greater than it.
if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxBytes = numeral(maxModelMemoryLimit).value();
if (allowMMLGreaterThanMax === false) {
// @ts-ignore
const mmlBytes = numeral(estimatedModelMemoryLimit).value();
if (mmlBytes > maxBytes) {
if (maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxBytes = numeral(maxModelMemoryLimit).value();
if (mmlBytes > maxBytes) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
mmlCappedAtMax = true;
}
}

// if we've not already capped the estimated mml at the hard max server setting
// ensure that the estimated mml isn't greater than the effective max mml
if (mmlCappedAtMax === false && effectiveMaxModelMemoryLimit !== undefined) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
if (mmlBytes > effectiveMaxMmlBytes) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(effectiveMaxMmlBytes / numeral('1MB').value())}MB`;
}
}
}

return {
estimatedModelMemoryLimit,
modelMemoryLimit,
...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}),
...(effectiveMaxModelMemoryLimit ? { effectiveMaxModelMemoryLimit } : {}),
};
};
}
17 changes: 14 additions & 3 deletions x-pack/plugins/ml/server/models/job_validation/messages.js
Original file line number Diff line number Diff line change
Expand Up @@ -433,13 +433,24 @@ export const getMessages = () => {
}
),
},
mml_greater_than_max_mml: {
status: 'ERROR',
mml_greater_than_effective_max_mml: {
status: 'WARNING',
text: i18n.translate('xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage', {
defaultMessage:
'The model memory limit is greater than the max model memory limit configured for this cluster.',
'Job will not be able to run in the current cluster because model memory limit is higher than {effectiveMaxModelMemoryLimit}.',
values: { effectiveMaxModelMemoryLimit: '{{effectiveMaxModelMemoryLimit}}' },
}),
},
mml_greater_than_max_mml: {
status: 'ERROR',
text: i18n.translate(
'xpack.ml.models.jobValidation.messages.mmlGreaterThanEffectiveMaxMmlMessage',
{
defaultMessage:
'The model memory limit is greater than the max model memory limit configured for this cluster.',
}
),
},
mml_value_invalid: {
status: 'ERROR',
text: i18n.translate('xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage', {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ describe('ML - validateModelMemoryLimit', () => {
},
limits: {
max_model_memory_limit: '30mb',
effective_max_model_memory_limit: '40mb',
},
};

Expand Down Expand Up @@ -211,6 +212,30 @@ describe('ML - validateModelMemoryLimit', () => {
});
});

it('Called with no duration or split and mml above limit, no max setting', () => {
const job = getJobConfig();
const duration = undefined;
// @ts-ignore
job.analysis_limits.model_memory_limit = '31mb';

return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).toEqual([]);
});
});

it('Called with no duration or split and mml above limit, no max setting, above effective max mml', () => {
const job = getJobConfig();
const duration = undefined;
// @ts-ignore
job.analysis_limits.model_memory_limit = '41mb';

return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).toEqual(['mml_greater_than_effective_max_mml']);
});
});

it('Called with small number of detectors, so estimated mml is under specified mml, no max setting', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
import { validateJobObject } from './validate_job_object';
import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit';
import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
import { MlInfoResponse } from '../../../common/types/ml_server_info';

// The minimum value the backend expects is 1MByte
const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
Expand Down Expand Up @@ -50,9 +51,9 @@ export async function validateModelMemoryLimit(

// retrieve the max_model_memory_limit value from the server
// this will be unset unless the user has set this on their cluster
const maxModelMemoryLimit: string | undefined = (
await callWithRequest('ml.info')
)?.limits?.max_model_memory_limit?.toUpperCase();
const info = await callWithRequest<MlInfoResponse>('ml.info');
const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();

if (runCalcModelMemoryTest) {
const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)(
Expand Down Expand Up @@ -113,17 +114,35 @@ export async function validateModelMemoryLimit(

// if max_model_memory_limit has been set,
// make sure the user defined MML is not greater than it
if (maxModelMemoryLimit !== undefined && mml !== null) {
// @ts-ignore
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
if (mml !== null) {
let maxMmlExceeded = false;
// @ts-ignore
const mmlBytes = numeral(mml).value();
if (mmlBytes > maxMmlBytes) {
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml,
});

if (maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
if (mmlBytes > maxMmlBytes) {
maxMmlExceeded = true;
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml,
});
}
}

if (effectiveMaxModelMemoryLimit !== undefined && maxMmlExceeded === false) {
// @ts-ignore
const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
if (mmlBytes > effectiveMaxMmlBytes) {
messages.push({
id: 'mml_greater_than_effective_max_mml',
maxModelMemoryLimit,
mml,
effectiveMaxModelMemoryLimit,
});
}
}
}

Expand Down