Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Improve support for script and aggregation fields in anomaly detection jobs #81923

Merged
merged 30 commits into from
Nov 17, 2020
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
75c4924
[ML] Add support for script fields in wizard and in viz
qn895 Oct 27, 2020
d90f1ec
[ML] Fix datafeed preview for aggregations not showing if name of agg…
qn895 Oct 27, 2020
b580708
[ML] Seems like it can be just aggregations
qn895 Oct 27, 2020
77a7e92
[ML] Fix AE plotting for aggregated fields
qn895 Oct 27, 2020
ecc77ef
[ML] Export Aggregation interface
qn895 Oct 27, 2020
8cd0732
[ML] Fix plotting issue with AE script fields
qn895 Oct 27, 2020
a930e21
[ML] Fix typescript
qn895 Oct 28, 2020
571bf79
Merge remote-tracking branch 'upstream/master' into ml-fix-script-n-a…
qn895 Oct 30, 2020
25e063d
Merge upstream/master into ml-fix-script-n-aggregation-fields
qn895 Nov 3, 2020
0455ba1
[ML] Clean up validation for nested aggregation field
qn895 Nov 4, 2020
4a557f5
[ML] Add validation for missing summary count if datafeed has aggrega…
qn895 Nov 4, 2020
2712c6f
[ML] Fix anomaly search broken because max results is 0
qn895 Nov 4, 2020
f76039b
[ML] Add missing_summary_count_field_name validation to validate.ts test
qn895 Nov 4, 2020
85e1773
[ML] Fix order between datafeedConfig and allowMMLGreaterThanMax
qn895 Nov 4, 2020
79f22dd
[ML] Fix jest tests
qn895 Nov 4, 2020
739a423
[ML] Fix calculateModelMemoryLimitProvider order
qn895 Nov 4, 2020
dfd070a
Merge remote-tracking branch 'upstream/master' into ml-fix-script-n-a…
qn895 Nov 5, 2020
0ced887
Merge remote-tracking branch 'upstream/master' into ml-fix-script-n-a…
qn895 Nov 5, 2020
d96a119
[ML] Rename cardinalityField, combine if statements, add missing_summ…
qn895 Nov 5, 2020
a3a0809
[ML] Fix character escape & disable next step if misisng summaryCount…
qn895 Nov 5, 2020
9769d94
[ML] Fix allowMMLGreaterThanMax changed to undefined previously
qn895 Nov 5, 2020
f54c8e8
[ML] Update datafeedAggregations check
qn895 Nov 5, 2020
7355559
[ML] Change DatafeedOverride to Datafeed
qn895 Nov 5, 2020
1b5898f
[ML] Fix field in aggregatable check
qn895 Nov 6, 2020
20c1f67
Merge remote-tracking branch 'upstream/master' into ml-fix-script-n-a…
qn895 Nov 9, 2020
1a3c40f
Merge upstream/master into ml-fix-script-n-aggregation-fields
qn895 Nov 11, 2020
22f938c
[ML] Update text description
qn895 Nov 11, 2020
cf5d4d1
[ML] Update text translations
qn895 Nov 11, 2020
6d50e8e
Merge branch 'master' into ml-fix-script-n-aggregation-fields
kibanamachine Nov 17, 2020
a661a27
Merge branch 'master' into ml-fix-script-n-aggregation-fields
kibanamachine Nov 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions x-pack/plugins/ml/common/constants/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,16 @@ export const getMessages = once(() => {
url:
'https://www.elastic.co/guide/en/elasticsearch/reference/{{version}}/ml-job-resource.html#ml-job-resource',
},
missing_summary_count_field_name: {
status: VALIDATION_STATUS.ERROR,
text: i18n.translate(
'xpack.ml.models.jobValidation.messages.missingSummaryCountFieldNameMessage',
{
defaultMessage:
'A job configured with a datafeed with aggregations must set summary_count_field_name; use doc_count or suitable alternative.',
}
),
},
skipped_extended_tests: {
status: VALIDATION_STATUS.WARNING,
text: i18n.translate('xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage', {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export interface Datafeed {
job_id: JobId;
query: object;
query_delay?: string;
script_fields?: object;
script_fields?: Record<string, any>;
scroll_size?: number;
delayed_data_check_config?: object;
indices_options?: IndicesOptions;
Expand All @@ -30,16 +30,17 @@ export interface ChunkingConfig {
time_span?: string;
}

interface Aggregation {
buckets: {
export type Aggregation = Record<
string,
{
date_histogram: {
field: string;
fixed_interval: string;
};
aggregations?: { [key: string]: any };
aggs?: { [key: string]: any };
};
}
}
>;

interface IndicesOptions {
expand_wildcards?: 'all' | 'open' | 'closed' | 'hidden' | 'none';
Expand Down
13 changes: 13 additions & 0 deletions x-pack/plugins/ml/common/types/fields.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,16 @@ export const mlCategory: Field = {
type: ES_FIELD_TYPES.KEYWORD,
aggregatable: false,
};

export interface FieldAggCardinality {
field: string;
percent?: any;
}

export interface ScriptAggCardinality {
script: any;
}

export interface AggCardinality {
cardinality: FieldAggCardinality | ScriptAggCardinality;
}
22 changes: 22 additions & 0 deletions x-pack/plugins/ml/common/util/datafeed_utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

import { Aggregation, Datafeed } from '../types/anomaly_detection_jobs';

export const getDatafeedAggregations = (
datafeedConfig: Partial<Datafeed> | undefined
): Aggregation | undefined => {
if (datafeedConfig?.aggregations !== undefined) return datafeedConfig.aggregations;
if (datafeedConfig?.aggs !== undefined) return datafeedConfig.aggs;
return undefined;
};

export const getAggregationBucketsName = (aggregations: any): string | undefined => {
if (typeof aggregations === 'object') {
const keys = Object.keys(aggregations);
return keys.length > 0 ? keys[0] : undefined;
}
};
6 changes: 3 additions & 3 deletions x-pack/plugins/ml/common/util/job_utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ describe('ML - job utils', () => {
expect(isTimeSeriesViewDetector(job, 3)).toBe(false);
});

test('returns false for a detector using a script field as a metric field_name', () => {
expect(isTimeSeriesViewDetector(job, 4)).toBe(false);
test('returns true for a detector using a script field as a metric field_name', () => {
expect(isTimeSeriesViewDetector(job, 4)).toBe(true);
});
});

Expand Down Expand Up @@ -281,6 +281,7 @@ describe('ML - job utils', () => {
expect(isSourceDataChartableForDetector(job, 22)).toBe(true);
expect(isSourceDataChartableForDetector(job, 23)).toBe(true);
expect(isSourceDataChartableForDetector(job, 24)).toBe(true);
expect(isSourceDataChartableForDetector(job, 37)).toBe(true);
});

test('returns false for expected detectors', () => {
Expand All @@ -296,7 +297,6 @@ describe('ML - job utils', () => {
expect(isSourceDataChartableForDetector(job, 34)).toBe(false);
expect(isSourceDataChartableForDetector(job, 35)).toBe(false);
expect(isSourceDataChartableForDetector(job, 36)).toBe(false);
expect(isSourceDataChartableForDetector(job, 37)).toBe(false);
});
});

Expand Down
23 changes: 22 additions & 1 deletion x-pack/plugins/ml/common/util/job_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { MlServerLimits } from '../types/ml_server_info';
import { JobValidationMessage, JobValidationMessageId } from '../constants/messages';
import { ES_AGGREGATION, ML_JOB_AGGREGATION } from '../constants/aggregation_types';
import { MLCATEGORY } from '../constants/field_types';
import { getDatafeedAggregations } from './datafeed_utils';

export interface ValidationResults {
valid: boolean;
Expand Down Expand Up @@ -94,7 +95,6 @@ export function isSourceDataChartableForDetector(job: CombinedJob, detectorIndex
// Perform extra check to see if the detector is using a scripted field.
const scriptFields = Object.keys(job.datafeed_config.script_fields);
isSourceDataChartable =
scriptFields.indexOf(dtr.field_name!) === -1 &&
scriptFields.indexOf(dtr.partition_field_name!) === -1 &&
scriptFields.indexOf(dtr.by_field_name!) === -1 &&
scriptFields.indexOf(dtr.over_field_name!) === -1;
Expand Down Expand Up @@ -559,6 +559,27 @@ export function basicDatafeedValidation(datafeed: Datafeed): ValidationResults {
};
}

export function basicJobAndDatafeedValidation(job: Job, datafeed: Datafeed): ValidationResults {
const messages: ValidationResults['messages'] = [];
let valid = true;

if (datafeed && job) {
const datafeedAggregations = getDatafeedAggregations(datafeed);

if (datafeedAggregations !== undefined && !job.analysis_config?.summary_count_field_name) {
valid = false;
messages.push({ id: 'missing_summary_count_field_name' });
}
}

return {
messages,
valid,
contains: (id) => messages.some((m) => id === m.id),
find: (id) => messages.find((m) => id === m.id),
};
}

export function validateModelMemoryLimit(job: Job, limits: MlServerLimits): ValidationResults {
const messages: ValidationResults['messages'] = [];
let valid = true;
Expand Down
19 changes: 19 additions & 0 deletions x-pack/plugins/ml/common/util/validation_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,22 @@ export function isValidJson(json: string) {
return false;
}
}

export function findAggField(aggs: Record<string, any>, fieldName: string): any {
let value;
Object.keys(aggs).some(function (k) {
if (k === fieldName) {
value = aggs[k];
return true;
}
if (aggs.hasOwnProperty(k) && typeof aggs[k] === 'object') {
value = findAggField(aggs[k], fieldName);
return value !== undefined;
}
});
return value;
}

export function isValidAggregationField(aggs: Record<string, any>, fieldName: string): boolean {
return findAggField(aggs, fieldName) !== undefined;
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ export const anomalyDataChange = function (
config.timeField,
range.min,
range.max,
bucketSpanSeconds * 1000
bucketSpanSeconds * 1000,
config.datafeedConfig
)
.toPromise();
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ export const useModelMemoryEstimator = (
// Update model memory estimation payload on the job creator updates
useEffect(() => {
modelMemoryEstimator.update({
datafeedConfig: jobCreator.datafeedConfig,
analysisConfig: jobCreator.jobConfig.analysis_config,
indexPattern: jobCreator.indexPatternTitle,
query: jobCreator.datafeedConfig.query,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { map, startWith, tap } from 'rxjs/operators';
import {
basicJobValidation,
basicDatafeedValidation,
basicJobAndDatafeedValidation,
} from '../../../../../../common/util/job_utils';
import { getNewJobLimits } from '../../../../services/ml_server_info';
import { JobCreator, JobCreatorType, isCategorizationJobCreator } from '../job_creator';
Expand Down Expand Up @@ -53,6 +54,7 @@ export interface BasicValidations {
scrollSize: Validation;
categorizerMissingPerPartition: Validation;
categorizerVaryingPerPartitionField: Validation;
summaryCountField: Validation;
}

export interface AdvancedValidations {
Expand Down Expand Up @@ -80,6 +82,7 @@ export class JobValidator {
scrollSize: { valid: true },
categorizerMissingPerPartition: { valid: true },
categorizerVaryingPerPartitionField: { valid: true },
summaryCountField: { valid: true },
};
private _advancedValidations: AdvancedValidations = {
categorizationFieldValid: { valid: true },
Expand Down Expand Up @@ -197,6 +200,14 @@ export class JobValidator {
datafeedConfig
);

const basicJobAndDatafeedResults = basicJobAndDatafeedValidation(jobConfig, datafeedConfig);
populateValidationMessages(
basicJobAndDatafeedResults,
this._basicValidations,
jobConfig,
datafeedConfig
);

// run addition job and group id validation
const idResults = checkForExistingJobAndGroupIds(
this._jobCreator.jobId,
Expand Down Expand Up @@ -228,6 +239,9 @@ export class JobValidator {
public get bucketSpan(): Validation {
return this._basicValidations.bucketSpan;
}
public get summaryCountField(): Validation {
return this._basicValidations.summaryCountField;
}

public get duplicateDetectors(): Validation {
return this._basicValidations.duplicateDetectors;
Expand Down Expand Up @@ -297,6 +311,7 @@ export class JobValidator {
this.duplicateDetectors.valid &&
this.categorizerMissingPerPartition.valid &&
this.categorizerVaryingPerPartitionField.valid &&
this.summaryCountField.valid &&
!this.validating &&
(this._jobCreator.type !== JOB_TYPE.CATEGORIZATION ||
(this._jobCreator.type === JOB_TYPE.CATEGORIZATION && this.categorizationField))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ export function populateValidationMessages(
basicValidations.frequency.valid = false;
basicValidations.frequency.message = invalidTimeIntervalMessage(datafeedConfig.frequency);
}
if (validationResults.contains('missing_summary_count_field_name')) {
basicValidations.summaryCountField.valid = false;
basicValidations.summaryCountField.message = i18n.translate(
'xpack.ml.newJob.wizard.validateJob.summaryCountFieldMissing',
{
defaultMessage: 'Required field as the datafeed uses aggregations.',
}
);
}
}

export function checkForExistingJobAndGroupIds(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,12 @@ export const DatafeedPreview: FC<{
if (combinedJob.datafeed_config && combinedJob.datafeed_config.indices.length) {
try {
const resp = await mlJobService.searchPreview(combinedJob);
const data = resp.aggregations
? resp.aggregations.buckets.buckets.slice(0, ML_DATA_PREVIEW_COUNT)
: resp.hits.hits;
let data = resp.hits.hits;
// the first item under aggregations can be any name
if (typeof resp.aggregations === 'object' && Object.keys(resp.aggregations).length > 0) {
const accessor = Object.keys(resp.aggregations)[0];
data = resp.aggregations[accessor].buckets.slice(0, ML_DATA_PREVIEW_COUNT);
}

setPreviewJsonString(JSON.stringify(data, null, 2));
} catch (error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,44 @@
import React, { memo, FC } from 'react';
import { i18n } from '@kbn/i18n';
import { FormattedMessage } from '@kbn/i18n/react';
import { EuiDescribedFormGroup, EuiFormRow } from '@elastic/eui';
import { EuiDescribedFormGroup, EuiFormRow, EuiLink } from '@elastic/eui';
import { Validation } from '../../../../../common/job_validator';
import { useMlKibana } from '../../../../../../../contexts/kibana';

export const Description: FC = memo(({ children }) => {
interface Props {
validation: Validation;
}

export const Description: FC<Props> = memo(({ children, validation }) => {
const title = i18n.translate('xpack.ml.newJob.wizard.pickFieldsStep.summaryCountField.title', {
defaultMessage: 'Summary count field',
});
const {
services: { docLinks },
} = useMlKibana();
const { ELASTIC_WEBSITE_URL, DOC_LINK_VERSION } = docLinks;
const docsUrl = `${ELASTIC_WEBSITE_URL}guide/en/machine-learning/${DOC_LINK_VERSION}/ml-configuring-aggregation.html`;
return (
<EuiDescribedFormGroup
title={<h3>{title}</h3>}
description={
<FormattedMessage
id="xpack.ml.newJob.wizard.pickFieldsStep.summaryCountField.description"
defaultMessage="Optional, for use if input data has been pre-summarized e.g. \{docCountParam\}."
defaultMessage="If the input data is {aggregated}, specify the field that contains the document count."
values={{
aggregated: (
<EuiLink href={docsUrl} target="_blank">
<FormattedMessage
id="xpack.ml.newJob.wizard.pickFieldsStep.summaryCountField.aggregatedText"
defaultMessage="aggregated"
/>
</EuiLink>
),
}}
/>
}
>
<EuiFormRow label={title}>
<EuiFormRow label={title} error={validation.message} isInvalid={validation.valid === false}>
<>{children}</>
</EuiFormRow>
</EuiDescribedFormGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,23 @@ import {
import { Description } from './description';

export const SummaryCountField: FC = () => {
const { jobCreator: jc, jobCreatorUpdate, jobCreatorUpdated } = useContext(JobCreatorContext);
const {
jobCreator: jc,
jobCreatorUpdate,
jobCreatorUpdated,
jobValidator,
jobValidatorUpdated,
} = useContext(JobCreatorContext);

const jobCreator = jc as MultiMetricJobCreator | PopulationJobCreator | AdvancedJobCreator;
const { fields } = newJobCapsService;
const [summaryCountFieldName, setSummaryCountFieldName] = useState(
jobCreator.summaryCountFieldName
);
const [validation, setValidation] = useState(jobValidator.summaryCountField);
useEffect(() => {
setValidation(jobValidator.summaryCountField);
}, [jobValidatorUpdated]);

useEffect(() => {
jobCreator.summaryCountFieldName = summaryCountFieldName;
Expand All @@ -35,7 +45,7 @@ export const SummaryCountField: FC = () => {
}, [jobCreatorUpdated]);

return (
<Description>
<Description validation={validation}>
<SummaryCountFieldSelect
fields={fields}
changeHandler={setSummaryCountFieldName}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -628,13 +628,15 @@ export function mlApiServicesProvider(httpService: HttpService) {
},

calculateModelMemoryLimit$({
datafeedConfig,
analysisConfig,
indexPattern,
query,
timeFieldName,
earliestMs,
latestMs,
}: {
datafeedConfig?: Datafeed;
analysisConfig: AnalysisConfig;
indexPattern: string;
query: any;
Expand All @@ -643,6 +645,7 @@ export function mlApiServicesProvider(httpService: HttpService) {
latestMs: number;
}) {
const body = JSON.stringify({
datafeedConfig,
analysisConfig,
indexPattern,
query,
Expand Down
Loading