From 21ba2d6ede446cc3d8753f0a6759e12d63a6b3da Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 25 Oct 2018 14:57:38 +0100 Subject: [PATCH] [ML] Include message in field_stats for text log files This change ensures the `message` field is always included in the `field_stats` for the semi-structured text log file file structure. Previously it was not, as it will almost certainly contain all distinct values. However, for consistency in the UI it's useful to include it. --- .../TextLogFileStructureFinder.java | 1 + .../TextLogFileStructureFinderTests.java | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java index 7578ca8f7fbfb..591a326128271 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java @@ -89,6 +89,7 @@ static TextLogFileStructureFinder makeTextLogFileStructureFinder(List ex mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "date")); SortedMap fieldStats = new TreeMap<>(); + fieldStats.put("message", FileStructureUtils.calculateFieldStats(sampleMessages, timeoutChecker)); GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, fieldStats, timeoutChecker); // We can't parse directly into @timestamp using Grok, so parse to some other time field, which the date filter will then remove diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java index a848f384e2e5f..de4244cd620a5 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java @@ -7,11 +7,16 @@ import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats; import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure; import org.elasticsearch.xpack.ml.filestructurefinder.TimestampFormatFinder.TimestampMatch; import java.util.Collections; import java.util.Set; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.not; public class TextLogFileStructureFinderTests extends FileStructureTestCase { @@ -127,6 +132,11 @@ public void testCreateConfigsGivenElasticsearchLog() throws Exception { assertEquals("\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern()); assertEquals("timestamp", structure.getTimestampField()); assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats()); + FieldStats messageFieldStats = structure.getFieldStats().get("message"); + assertNotNull(messageFieldStats); + for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) { + assertThat(structureFinder.getSampleMessages(), hasItem(statMessage)); + } } public void testCreateConfigsGivenElasticsearchLogAndTimestampFieldOverride() throws Exception { @@ -158,6 +168,11 @@ public void testCreateConfigsGivenElasticsearchLogAndTimestampFieldOverride() th assertEquals("\\[%{TIMESTAMP_ISO8601:my_time}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern()); assertEquals("my_time", structure.getTimestampField()); assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats()); + FieldStats messageFieldStats = structure.getFieldStats().get("message"); + assertNotNull(messageFieldStats); + for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) { + assertThat(structureFinder.getSampleMessages(), hasItem(statMessage)); + } } public void testCreateConfigsGivenElasticsearchLogAndGrokPatternOverride() throws Exception { @@ -191,6 +206,13 @@ public void testCreateConfigsGivenElasticsearchLogAndGrokPatternOverride() throw "\\[%{JAVACLASS:class} *\\] \\[%{HOSTNAME:node}\\] %{JAVALOGMESSAGE:message}", structure.getGrokPattern()); assertEquals("timestamp", structure.getTimestampField()); assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats()); + FieldStats messageFieldStats = structure.getFieldStats().get("message"); + assertNotNull(messageFieldStats); + for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) { + // In this case the "message" field was output by the Grok pattern, so "message" + // at the end of the processing will _not_ contain a complete sample message + assertThat(structureFinder.getSampleMessages(), not(hasItem(statMessage))); + } } public void testCreateConfigsGivenElasticsearchLogAndImpossibleGrokPatternOverride() {