From e39d396e025050968d71f577010040905a354380 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 14 Mar 2018 16:05:26 +0000 Subject: [PATCH 01/12] Store offsets in index prefix fields when stored in the parent field The index prefix field is normally indexed as docs-only, given that it cannot be used in phrases. However, in the case that the parent field has been indexed with offsets, or has term-vector offsets, we should also store this in the index prefix field for highlighting. Note that this commit does not implement highlighting on prefix fields, but rather ensures that future work can implement this without a backwards-break in index data. Closes #28994 --- .../index/mapper/TextFieldMapper.java | 18 ++++- .../index/mapper/TextFieldMapperTests.java | 75 +++++++++++++++++++ 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 799698ac776d7..e2f8eb4e64f63 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; @@ -152,11 +153,20 @@ public TextFieldMapper build(BuilderContext context) { fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionIncrementGap)); } setupFieldType(context); - if (prefixFieldType != null && fieldType().isSearchable() == false) { - throw new IllegalArgumentException("Cannot set index_prefix on unindexed field [" + name() + "]"); + PrefixFieldMapper prefixMapper = null; + if (prefixFieldType != null) { + if (fieldType().isSearchable() == false) { + throw new IllegalArgumentException("Cannot set index_prefix on unindexed field [" + name() + "]"); + } + if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { + prefixFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + } + if (fieldType.storeTermVectorOffsets()) { + prefixFieldType.setStoreTermVectorOffsets(true); + } + prefixFieldType.setAnalyzer(fieldType.indexAnalyzer()); + prefixMapper = new PrefixFieldMapper(prefixFieldType, context.indexSettings()); } - PrefixFieldMapper prefixMapper = prefixFieldType == null ? null - : new PrefixFieldMapper(prefixFieldType.setAnalyzer(fieldType.indexAnalyzer()), context.indexSettings()); return new TextFieldMapper( name, fieldType, defaultFieldType, positionIncrementGap, prefixMapper, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index d96c8f6ed742f..c15eeebbaa4d6 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; @@ -594,6 +595,80 @@ public void testEmptyName() throws IOException { assertThat(e.getMessage(), containsString("name cannot be empty string")); } + public void testIndexPrefixIndexTypes() throws IOException { + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { + throw new UnsupportedOperationException(); + }, null); + + { + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field") + .field("type", "text") + .field("analyzer", "english") + .startObject("index_prefix").endObject() + .field("index_options", "offsets") + .endObject().endObject().endObject().endObject().string(); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + + FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix"); + FieldType ft = prefix.fieldType; + assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, ft.indexOptions()); + } + + { + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field") + .field("type", "text") + .field("analyzer", "english") + .startObject("index_prefix").endObject() + .field("index_options", "positions") + .endObject().endObject().endObject().endObject().string(); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + + FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix"); + FieldType ft = prefix.fieldType; + assertEquals(IndexOptions.DOCS, ft.indexOptions()); + assertFalse(ft.storeTermVectors()); + } + + { + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field") + .field("type", "text") + .field("analyzer", "english") + .startObject("index_prefix").endObject() + .field("term_vector", "with_positions_offsets") + .endObject().endObject().endObject().endObject().string(); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + + FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix"); + FieldType ft = prefix.fieldType; + assertEquals(IndexOptions.DOCS, ft.indexOptions()); + assertTrue(ft.storeTermVectorOffsets()); + } + + { + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field") + .field("type", "text") + .field("analyzer", "english") + .startObject("index_prefix").endObject() + .field("term_vector", "with_positions") + .endObject().endObject().endObject().endObject().string(); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + + FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix"); + FieldType ft = prefix.fieldType; + assertEquals(IndexOptions.DOCS, ft.indexOptions()); + assertFalse(ft.storeTermVectorOffsets()); + } + } + public void testIndexPrefixMapping() throws IOException { QueryShardContext queryShardContext = indexService.newQueryShardContext( From 4cea30093f3eaae03169c5eb0893b1d7e0071038 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 24 Apr 2018 11:50:16 +0100 Subject: [PATCH 02/12] Upgrade to lucene 7.4.0 snapshot --- buildSrc/version.properties | 2 +- docs/Versions.asciidoc | 4 ++-- .../licenses/lucene-expressions-7.3.0.jar.sha1 | 1 - .../lucene-expressions-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + .../licenses/lucene-analyzers-icu-7.3.0.jar.sha1 | 1 - ...ucene-analyzers-icu-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + .../elasticsearch/index/analysis/IcuTokenizerFactory.java | 8 ++++---- .../licenses/lucene-analyzers-kuromoji-7.3.0.jar.sha1 | 1 - ...-analyzers-kuromoji-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + .../licenses/lucene-analyzers-phonetic-7.3.0.jar.sha1 | 1 - ...-analyzers-phonetic-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + .../licenses/lucene-analyzers-smartcn-7.3.0.jar.sha1 | 1 - ...e-analyzers-smartcn-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + .../licenses/lucene-analyzers-stempel-7.3.0.jar.sha1 | 1 - ...e-analyzers-stempel-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + .../licenses/lucene-analyzers-morfologik-7.3.0.jar.sha1 | 1 - ...nalyzers-morfologik-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-analyzers-common-7.3.0.jar.sha1 | 1 - ...ne-analyzers-common-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-backward-codecs-7.3.0.jar.sha1 | 1 - ...ene-backward-codecs-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-core-7.3.0.jar.sha1 | 1 - .../lucene-core-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-grouping-7.3.0.jar.sha1 | 1 - .../lucene-grouping-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-highlighter-7.3.0.jar.sha1 | 1 - .../lucene-highlighter-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-join-7.3.0.jar.sha1 | 1 - .../lucene-join-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-memory-7.3.0.jar.sha1 | 1 - .../lucene-memory-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-misc-7.3.0.jar.sha1 | 1 - .../lucene-misc-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-queries-7.3.0.jar.sha1 | 1 - .../lucene-queries-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-queryparser-7.3.0.jar.sha1 | 1 - .../lucene-queryparser-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-sandbox-7.3.0.jar.sha1 | 1 - .../lucene-sandbox-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-spatial-7.3.0.jar.sha1 | 1 - .../lucene-spatial-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-spatial-extras-7.3.0.jar.sha1 | 1 - ...cene-spatial-extras-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-spatial3d-7.3.0.jar.sha1 | 1 - .../lucene-spatial3d-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/licenses/lucene-suggest-7.3.0.jar.sha1 | 1 - .../lucene-suggest-7.4.0-snapshot-330fd18f20.jar.sha1 | 1 + server/src/main/java/org/elasticsearch/Version.java | 2 +- 48 files changed, 30 insertions(+), 30 deletions(-) delete mode 100644 modules/lang-expression/licenses/lucene-expressions-7.3.0.jar.sha1 create mode 100644 modules/lang-expression/licenses/lucene-expressions-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 plugins/analysis-icu/licenses/lucene-analyzers-icu-7.3.0.jar.sha1 create mode 100644 plugins/analysis-icu/licenses/lucene-analyzers-icu-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-7.3.0.jar.sha1 create mode 100644 plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-7.3.0.jar.sha1 create mode 100644 plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-7.3.0.jar.sha1 create mode 100644 plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 plugins/analysis-stempel/licenses/lucene-analyzers-stempel-7.3.0.jar.sha1 create mode 100644 plugins/analysis-stempel/licenses/lucene-analyzers-stempel-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-7.3.0.jar.sha1 create mode 100644 plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-analyzers-common-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-analyzers-common-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-backward-codecs-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-backward-codecs-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-core-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-core-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-grouping-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-grouping-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-highlighter-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-highlighter-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-join-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-join-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-memory-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-memory-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-misc-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-misc-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-queries-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-queries-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-queryparser-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-queryparser-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-sandbox-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-sandbox-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-spatial-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-spatial-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-spatial-extras-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-spatial-extras-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-spatial3d-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-spatial3d-7.4.0-snapshot-330fd18f20.jar.sha1 delete mode 100644 server/licenses/lucene-suggest-7.3.0.jar.sha1 create mode 100644 server/licenses/lucene-suggest-7.4.0-snapshot-330fd18f20.jar.sha1 diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 0f3e6c62c9b75..40800c3c6f835 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -1,5 +1,5 @@ elasticsearch = 7.0.0-alpha1 -lucene = 7.3.0 +lucene = 7.4.0-snapshot-330fd18f20 # optional dependencies spatial4j = 0.7 diff --git a/docs/Versions.asciidoc b/docs/Versions.asciidoc index 9f7fdc9ea2f17..be02b03c4f4bb 100644 --- a/docs/Versions.asciidoc +++ b/docs/Versions.asciidoc @@ -1,7 +1,7 @@ :version: 7.0.0-alpha1 :major-version: 7.x -:lucene_version: 7.3.0 -:lucene_version_path: 7_3_0 +:lucene_version: 7.4.0-snapshot-330fd18f20 +:lucene_version_path: 7.4.0-snapshot-330fd18f20 :branch: master :jdk: 1.8.0_131 :jdk_major: 8 diff --git a/modules/lang-expression/licenses/lucene-expressions-7.3.0.jar.sha1 b/modules/lang-expression/licenses/lucene-expressions-7.3.0.jar.sha1 deleted file mode 100644 index 62a094a8b0feb..0000000000000 --- a/modules/lang-expression/licenses/lucene-expressions-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -cb82d9db3043bbd25b4d0eb5022ed1e529c936d3 \ No newline at end of file diff --git a/modules/lang-expression/licenses/lucene-expressions-7.4.0-snapshot-330fd18f20.jar.sha1 b/modules/lang-expression/licenses/lucene-expressions-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..73f4e7c663885 --- /dev/null +++ b/modules/lang-expression/licenses/lucene-expressions-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +e89319d505f0d338e596310375878c0b9c4c1350 \ No newline at end of file diff --git a/plugins/analysis-icu/licenses/lucene-analyzers-icu-7.3.0.jar.sha1 b/plugins/analysis-icu/licenses/lucene-analyzers-icu-7.3.0.jar.sha1 deleted file mode 100644 index de70972e975f0..0000000000000 --- a/plugins/analysis-icu/licenses/lucene-analyzers-icu-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -c09216a18658d5b2912566efff8665e45edc24b4 \ No newline at end of file diff --git a/plugins/analysis-icu/licenses/lucene-analyzers-icu-7.4.0-snapshot-330fd18f20.jar.sha1 b/plugins/analysis-icu/licenses/lucene-analyzers-icu-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..09ee76c7769af --- /dev/null +++ b/plugins/analysis-icu/licenses/lucene-analyzers-icu-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +823eda1d03c8a5ebbfbac2c7f835e31c5aa806da \ No newline at end of file diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java index fa1999cf17e39..2d8226745f8b8 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java @@ -80,7 +80,7 @@ private ICUTokenizerConfig getIcuConfig(Environment env, Settings settings) { if (tailored.isEmpty()) { return null; } else { - final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT]; + final RuleBasedBreakIterator breakers[] = new RuleBasedBreakIterator[UScript.CODE_LIMIT]; for (Map.Entry entry : tailored.entrySet()) { int code = entry.getKey(); String resourcePath = entry.getValue(); @@ -89,9 +89,9 @@ private ICUTokenizerConfig getIcuConfig(Environment env, Settings settings) { // cjkAsWords nor myanmarAsWords are not configurable yet. ICUTokenizerConfig config = new DefaultICUTokenizerConfig(true, true) { @Override - public BreakIterator getBreakIterator(int script) { + public RuleBasedBreakIterator getBreakIterator(int script) { if (breakers[script] != null) { - return (BreakIterator) breakers[script].clone(); + return (RuleBasedBreakIterator) breakers[script].clone(); } else { return super.getBreakIterator(script); } @@ -105,7 +105,7 @@ public BreakIterator getBreakIterator(int script) { } //parse a single RBBi rule file - private BreakIterator parseRules(String filename, Environment env) throws IOException { + private RuleBasedBreakIterator parseRules(String filename, Environment env) throws IOException { final Path path = env.configFile().resolve(filename); String rules = Files.readAllLines(path) diff --git a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-7.3.0.jar.sha1 b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-7.3.0.jar.sha1 deleted file mode 100644 index 40ff3efe2642c..0000000000000 --- a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -c9d5bbd0affa90b46e173c762c35419a54977c35 \ No newline at end of file diff --git a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-7.4.0-snapshot-330fd18f20.jar.sha1 b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..5840784d16d9d --- /dev/null +++ b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +15b5f08fc0e3a6cfe8f0a35b845d1a8ae38d5064 \ No newline at end of file diff --git a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-7.3.0.jar.sha1 b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-7.3.0.jar.sha1 deleted file mode 100644 index 9442635addda9..0000000000000 --- a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4e6c63fa8ae005d81d12f0d88ffa98346b443ac4 \ No newline at end of file diff --git a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-7.4.0-snapshot-330fd18f20.jar.sha1 b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..187b790e2d839 --- /dev/null +++ b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +ac4d8f4f312bf80dbd0e7d178ca1085b439c03a1 \ No newline at end of file diff --git a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-7.3.0.jar.sha1 b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-7.3.0.jar.sha1 deleted file mode 100644 index 780824c4d4558..0000000000000 --- a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -37b7ff0a6493f139cb77f5bda965ac0189c8efd1 \ No newline at end of file diff --git a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-7.4.0-snapshot-330fd18f20.jar.sha1 b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..4f78b54d06330 --- /dev/null +++ b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +e6f2982ac7133dc6490da37c60eda2f326c6efc4 \ No newline at end of file diff --git a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-7.3.0.jar.sha1 b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-7.3.0.jar.sha1 deleted file mode 100644 index ba241e6a09915..0000000000000 --- a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -d189185da23b2221c4d532da5e2cacce735f8a0c \ No newline at end of file diff --git a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-7.4.0-snapshot-330fd18f20.jar.sha1 b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..3e418f83bef30 --- /dev/null +++ b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +9e3cf104d4b4c95da4d80b6f4100703dba542ee6 \ No newline at end of file diff --git a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-7.3.0.jar.sha1 b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-7.3.0.jar.sha1 deleted file mode 100644 index fb7e5befe4774..0000000000000 --- a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -74462b51de45afe708f1042cc901fe7370413871 \ No newline at end of file diff --git a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-7.4.0-snapshot-330fd18f20.jar.sha1 b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..238fa05d4a719 --- /dev/null +++ b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +60ccdbec015825e33f03871e293e0f43d5dba31b \ No newline at end of file diff --git a/server/licenses/lucene-analyzers-common-7.3.0.jar.sha1 b/server/licenses/lucene-analyzers-common-7.3.0.jar.sha1 deleted file mode 100644 index 5a50f9dd77f5e..0000000000000 --- a/server/licenses/lucene-analyzers-common-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4325a5cdf8d3fa23f326cd86a2297fee2bc844f5 \ No newline at end of file diff --git a/server/licenses/lucene-analyzers-common-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-analyzers-common-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..5cab698ddcf71 --- /dev/null +++ b/server/licenses/lucene-analyzers-common-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +32084e08e07487c7e6e29c3ef9fcf2fd5509e935 \ No newline at end of file diff --git a/server/licenses/lucene-backward-codecs-7.3.0.jar.sha1 b/server/licenses/lucene-backward-codecs-7.3.0.jar.sha1 deleted file mode 100644 index 309f301ad8c81..0000000000000 --- a/server/licenses/lucene-backward-codecs-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -3b618a21a924cb35ac1f27d3ca47d9ed04f43588 \ No newline at end of file diff --git a/server/licenses/lucene-backward-codecs-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-backward-codecs-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..d83e2da4ea215 --- /dev/null +++ b/server/licenses/lucene-backward-codecs-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +51fe2748c7126da99142359651ddc88792e94aa3 \ No newline at end of file diff --git a/server/licenses/lucene-core-7.3.0.jar.sha1 b/server/licenses/lucene-core-7.3.0.jar.sha1 deleted file mode 100644 index e12c932b38dd0..0000000000000 --- a/server/licenses/lucene-core-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -040e2de30c5e6bad868b144e371730200719ceb3 \ No newline at end of file diff --git a/server/licenses/lucene-core-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-core-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..f3e4ef9610a21 --- /dev/null +++ b/server/licenses/lucene-core-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +e4340eaead4ec2e85b0b80ea20ef2d7cb78efb50 \ No newline at end of file diff --git a/server/licenses/lucene-grouping-7.3.0.jar.sha1 b/server/licenses/lucene-grouping-7.3.0.jar.sha1 deleted file mode 100644 index 703384a64de9a..0000000000000 --- a/server/licenses/lucene-grouping-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -20a5c472a8be9bec7aa40472791389e875b9e1f2 \ No newline at end of file diff --git a/server/licenses/lucene-grouping-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-grouping-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..06f6746e6735e --- /dev/null +++ b/server/licenses/lucene-grouping-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +3b7df4ca3d11347c273c159eb75522994d3d17ea \ No newline at end of file diff --git a/server/licenses/lucene-highlighter-7.3.0.jar.sha1 b/server/licenses/lucene-highlighter-7.3.0.jar.sha1 deleted file mode 100644 index 6e38e2560636f..0000000000000 --- a/server/licenses/lucene-highlighter-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1f92c7d3d9bc2765fe6195bcc4fcb160d11175cc \ No newline at end of file diff --git a/server/licenses/lucene-highlighter-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-highlighter-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..78ca3c492cadc --- /dev/null +++ b/server/licenses/lucene-highlighter-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +4ad0d95b47aade5bddda42736f4fabeac6fc37f5 \ No newline at end of file diff --git a/server/licenses/lucene-join-7.3.0.jar.sha1 b/server/licenses/lucene-join-7.3.0.jar.sha1 deleted file mode 100644 index d7213d76a62aa..0000000000000 --- a/server/licenses/lucene-join-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -da4af75a7e4fe7843fbfa4b58e6a238b6b706d64 \ No newline at end of file diff --git a/server/licenses/lucene-join-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-join-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..aa8c9018b5f54 --- /dev/null +++ b/server/licenses/lucene-join-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +a7bcdbb24099017385263c7a5cde2dc7080486ca \ No newline at end of file diff --git a/server/licenses/lucene-memory-7.3.0.jar.sha1 b/server/licenses/lucene-memory-7.3.0.jar.sha1 deleted file mode 100644 index 6bb4a4d832d54..0000000000000 --- a/server/licenses/lucene-memory-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -fc45b02a5086ec454e6d6ae81fc2cbe7be1c0902 \ No newline at end of file diff --git a/server/licenses/lucene-memory-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-memory-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..01d37d30ac4bc --- /dev/null +++ b/server/licenses/lucene-memory-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +c0bd14ecc3877a9d41f69d5d3c9c62443c976dc6 \ No newline at end of file diff --git a/server/licenses/lucene-misc-7.3.0.jar.sha1 b/server/licenses/lucene-misc-7.3.0.jar.sha1 deleted file mode 100644 index 43c777150a3e1..0000000000000 --- a/server/licenses/lucene-misc-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b6a2418a94b84c29c4b9fcfe4381f2cc1aa4c214 \ No newline at end of file diff --git a/server/licenses/lucene-misc-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-misc-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..348cf7afbd11a --- /dev/null +++ b/server/licenses/lucene-misc-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +4dc80596cfa9ffb0ba13016b7454dc8b6fab2c1f \ No newline at end of file diff --git a/server/licenses/lucene-queries-7.3.0.jar.sha1 b/server/licenses/lucene-queries-7.3.0.jar.sha1 deleted file mode 100644 index b0ef2b4d0eb84..0000000000000 --- a/server/licenses/lucene-queries-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6292a5579a6ab3423ceca60d2ea41cd86481e7c0 \ No newline at end of file diff --git a/server/licenses/lucene-queries-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-queries-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..34bae244afee6 --- /dev/null +++ b/server/licenses/lucene-queries-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +94bd90bd24d88b887647443f54c187c4e7a2b3d5 \ No newline at end of file diff --git a/server/licenses/lucene-queryparser-7.3.0.jar.sha1 b/server/licenses/lucene-queryparser-7.3.0.jar.sha1 deleted file mode 100644 index 87a1d74498d82..0000000000000 --- a/server/licenses/lucene-queryparser-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -95b2563e5337377dde2eb987b3fce144be5e7a77 \ No newline at end of file diff --git a/server/licenses/lucene-queryparser-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-queryparser-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..4d749fa62a6b8 --- /dev/null +++ b/server/licenses/lucene-queryparser-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +491bd9444d2e04373bda8e171822fa3f59dccda7 \ No newline at end of file diff --git a/server/licenses/lucene-sandbox-7.3.0.jar.sha1 b/server/licenses/lucene-sandbox-7.3.0.jar.sha1 deleted file mode 100644 index 605263a2296ff..0000000000000 --- a/server/licenses/lucene-sandbox-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1efd2fa7cba1e359e3fbb8b4c11cab37024b2178 \ No newline at end of file diff --git a/server/licenses/lucene-sandbox-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-sandbox-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..84828d0b0b082 --- /dev/null +++ b/server/licenses/lucene-sandbox-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +c92404b33ef8f70d6f4b79906f67b7e806275ed5 \ No newline at end of file diff --git a/server/licenses/lucene-spatial-7.3.0.jar.sha1 b/server/licenses/lucene-spatial-7.3.0.jar.sha1 deleted file mode 100644 index 4fcd32b5d29bb..0000000000000 --- a/server/licenses/lucene-spatial-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -93512c2160bdc3e602141329e5945a91918b6752 \ No newline at end of file diff --git a/server/licenses/lucene-spatial-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-spatial-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..440fd9c0058b6 --- /dev/null +++ b/server/licenses/lucene-spatial-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +7494bd2bada5cb2f1c79d783d1843e9dae4c2749 \ No newline at end of file diff --git a/server/licenses/lucene-spatial-extras-7.3.0.jar.sha1 b/server/licenses/lucene-spatial-extras-7.3.0.jar.sha1 deleted file mode 100644 index 0f078420cdb19..0000000000000 --- a/server/licenses/lucene-spatial-extras-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -47090d8ddf99f6bbb64ee8ab7a76c3cd3165b88f \ No newline at end of file diff --git a/server/licenses/lucene-spatial-extras-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-spatial-extras-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..5a8f4a4cd4d28 --- /dev/null +++ b/server/licenses/lucene-spatial-extras-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +c872d48c0c5a2040fd753a87f376011683ea0ce3 \ No newline at end of file diff --git a/server/licenses/lucene-spatial3d-7.3.0.jar.sha1 b/server/licenses/lucene-spatial3d-7.3.0.jar.sha1 deleted file mode 100644 index 268ed39a78405..0000000000000 --- a/server/licenses/lucene-spatial3d-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ed8f07d67445d5acde6597996461640b2d92fa08 \ No newline at end of file diff --git a/server/licenses/lucene-spatial3d-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-spatial3d-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..6a78d90a6118d --- /dev/null +++ b/server/licenses/lucene-spatial3d-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +89bebdfb38cfca13856c55cc4c2d796982da734d \ No newline at end of file diff --git a/server/licenses/lucene-suggest-7.3.0.jar.sha1 b/server/licenses/lucene-suggest-7.3.0.jar.sha1 deleted file mode 100644 index 798238ce58bc1..0000000000000 --- a/server/licenses/lucene-suggest-7.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6034ccf6b27c659ab7a2678680bae8390fbfc40a \ No newline at end of file diff --git a/server/licenses/lucene-suggest-7.4.0-snapshot-330fd18f20.jar.sha1 b/server/licenses/lucene-suggest-7.4.0-snapshot-330fd18f20.jar.sha1 new file mode 100644 index 0000000000000..d8a58792524ae --- /dev/null +++ b/server/licenses/lucene-suggest-7.4.0-snapshot-330fd18f20.jar.sha1 @@ -0,0 +1 @@ +a898df610f79875e757a79219b723cac5f20d262 \ No newline at end of file diff --git a/server/src/main/java/org/elasticsearch/Version.java b/server/src/main/java/org/elasticsearch/Version.java index d8ef145a4a57f..91b953025812a 100644 --- a/server/src/main/java/org/elasticsearch/Version.java +++ b/server/src/main/java/org/elasticsearch/Version.java @@ -171,7 +171,7 @@ public class Version implements Comparable, ToXContentFragment { public static final Version V_6_3_0 = new Version(V_6_3_0_ID, org.apache.lucene.util.Version.LUCENE_7_3_0); public static final int V_7_0_0_alpha1_ID = 7000001; public static final Version V_7_0_0_alpha1 = - new Version(V_7_0_0_alpha1_ID, org.apache.lucene.util.Version.LUCENE_7_3_0); + new Version(V_7_0_0_alpha1_ID, org.apache.lucene.util.Version.LUCENE_7_4_0); public static final Version CURRENT = V_7_0_0_alpha1; static { From b591fc4dd3abd45d51c1b4c3ae8be3721594b689 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 8 May 2018 09:16:42 +0100 Subject: [PATCH 03/12] Add the ability to index two-term shingles for faster phrase queries Specifying `index_phrases: true` on a text field mapping will add a subsidiary [field]._index_phrase field, indexing two-term shingles from the parent field. The parent analysis chain is re-used, wrapped with a FixedShingleFilter. At query time, if a phrase match query is executed, the mapping will redirect it to run against the subsidiary field. This should trade faster phrase querying for a larger index and longer indexing times. --- .../test/search/200_index_phrase_search.yml | 67 ++++++++ .../index/mapper/MappedFieldType.java | 5 + .../index/mapper/TextFieldMapper.java | 161 +++++++++++++++++- .../index/query/MatchPhraseQueryBuilder.java | 8 +- .../index/search/MatchQuery.java | 10 +- .../index/mapper/TextFieldMapperTests.java | 56 ++++++ 6 files changed, 293 insertions(+), 14 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search.yml new file mode 100644 index 0000000000000..241fbc187dec6 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search.yml @@ -0,0 +1,67 @@ +--- +"search with indexed phrases": + - skip: + version: " - 6.99.99" + reason: index_phrase is only available as of 7.0.0 + - do: + indices.create: + index: test + body: + mappings: + test: + properties: + text: + type: text + index_phrases: true + + - do: + index: + index: test + type: test + id: 1 + body: { text: "peter piper picked a peck of pickled peppers" } + + - do: + indices.refresh: + index: [test] + + - do: + search: + index: test + body: + query: + match_phrase: + text: + query: "peter piper" + + - match: {hits.total: 1} + + - do: + search: + index: test + q: '"peter piper"~1' + df: text + + - match: {hits.total: 1} + + - do: + search: + index: test + body: + query: + match_phrase: + text: "peter piper picked" + + - match: {hits.total: 1} + + - do: + search: + index: test + body: + query: + match_phrase: + text: "piper" + + - match: {hits.total: 1} + + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 69189ab129762..443dea3763063 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -43,6 +43,7 @@ import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.search.DocValueFormat; import org.joda.time.DateTimeZone; @@ -360,6 +361,10 @@ public Query nullValueQuery() { public abstract Query existsQuery(QueryShardContext context); + public MatchQuery matchQuery(QueryShardContext context, String analyzer, int slop) { + throw new QueryShardException(context, "Can only use match queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"); + } + /** * An enum used to describe the relation between the range of terms in a * shard when compared with a query range diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index e2f8eb4e64f63..5f46162336a04 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -19,12 +19,14 @@ package org.elasticsearch.index.mapper; +import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; +import org.apache.lucene.analysis.shingle.FixedShingleFilter; import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; @@ -34,6 +36,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.logging.ESLoggerFactory; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.support.XContentMapValues; @@ -42,9 +45,10 @@ import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.search.MatchQuery; import java.io.IOException; -import java.util.Collections; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -55,9 +59,13 @@ /** A {@link FieldMapper} for full-text fields. */ public class TextFieldMapper extends FieldMapper { + private static final Logger logger = ESLoggerFactory.getLogger(TextFieldMapper.class); + public static final String CONTENT_TYPE = "text"; private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1; + public static final String FAST_PHRASE_SUFFIX = "._index_phrase"; + public static class Defaults { public static final double FIELDDATA_MIN_FREQUENCY = 0; public static final double FIELDDATA_MAX_FREQUENCY = Integer.MAX_VALUE; @@ -82,6 +90,7 @@ public static class Builder extends FieldMapper.Builder node, ParserCo builder.indexPrefixes(minChars, maxChars); DocumentMapperParser.checkNoRemainingFields(propName, indexPrefix, parserContext.indexVersionCreated()); iterator.remove(); + } else if (propName.equals("index_phrases")) { + builder.indexPhrases(XContentMapValues.nodeBooleanValue(propNode, "index_phrases")); + iterator.remove(); } } return builder; } } + private static class PhraseWrappedAnalyzer extends AnalyzerWrapper { + + private final Analyzer delegate; + + PhraseWrappedAnalyzer(Analyzer delegate) { + super(delegate.getReuseStrategy()); + this.delegate = delegate; + } + + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return delegate; + } + + @Override + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + return new TokenStreamComponents(components.getTokenizer(), new FixedShingleFilter(components.getTokenStream(), 2)); + } + } + private static class PrefixWrappedAnalyzer extends AnalyzerWrapper { private final int minChars; @@ -243,6 +281,40 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp } } + private static final class PhraseFieldType extends StringFieldType { + + static PhraseFieldType newInstance(String name, NamedAnalyzer analyzer) { + PhraseFieldType pft = new PhraseFieldType(name); + pft.setAnalyzer(analyzer.name(), analyzer.analyzer()); + return pft; + } + + PhraseFieldType(String name) { + setTokenized(true); + setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + setName(name); + } + + void setAnalyzer(String name, Analyzer delegate) { + setIndexAnalyzer(new NamedAnalyzer(name, AnalyzerScope.INDEX, new PhraseWrappedAnalyzer(delegate))); + } + + @Override + public MappedFieldType clone() { + return new PhraseFieldType(name()); + } + + @Override + public String typeName() { + return "phrase"; + } + + @Override + public Query existsQuery(QueryShardContext context) { + throw new UnsupportedOperationException(); + } + } + private static final class PrefixFieldType extends StringFieldType { final int minChars; @@ -307,6 +379,23 @@ public Query existsQuery(QueryShardContext context) { } } + private static final class PhraseFieldMapper extends FieldMapper { + + PhraseFieldMapper(PhraseFieldType fieldType, Settings indexSettings) { + super(fieldType.name(), fieldType, fieldType, indexSettings, MultiFields.empty(), CopyTo.empty()); + } + + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + protected String contentType() { + return "phrase"; + } + } + private static final class PrefixFieldMapper extends FieldMapper { protected PrefixFieldMapper(PrefixFieldType fieldType, Settings indexSettings) { @@ -340,6 +429,7 @@ public static final class TextFieldType extends StringFieldType { private double fielddataMaxFrequency; private int fielddataMinSegmentSize; private PrefixFieldType prefixFieldType; + private boolean indexPhrases = false; public TextFieldType() { setTokenized(true); @@ -420,6 +510,10 @@ void setPrefixFieldType(PrefixFieldType prefixFieldType) { this.prefixFieldType = prefixFieldType; } + void indexPhrases(boolean indexPhrases) { + this.indexPhrases = indexPhrases; + } + @Override public String typeName() { return CONTENT_TYPE; @@ -455,6 +549,19 @@ public Query nullValueQuery() { return termQuery(nullValue(), null); } + @Override + public MatchQuery matchQuery(QueryShardContext context, String analyzer, int slop) { + if (indexPhrases == false || slop != 0) { + MatchQuery mq = new MatchQuery(context); + if (analyzer != null) { + mq.setAnalyzer(analyzer); + } + mq.setPhraseSlop(slop); + return mq; + } + return new ShingledMatchQuery(context, analyzer); + } + @Override public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { if (fielddata == false) { @@ -466,11 +573,35 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { } } + private static class ShingledMatchQuery extends MatchQuery { + + ShingledMatchQuery(QueryShardContext context, String analyzer) { + super(context); + if (analyzer != null) { + this.setAnalyzer(analyzer); + } + } + + @Override + protected MatchQuery.MatchQueryBuilder newMatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { + return new MatchQuery.MatchQueryBuilder(analyzer, mapper){ + @Override + protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + assert slop == 0; + Query q = super.analyzePhrase(field + FAST_PHRASE_SUFFIX, new FixedShingleFilter(stream, 2), slop); + logger.info("Phrase query: " + q); + return q; + } + }; + } + } + private int positionIncrementGap; private PrefixFieldMapper prefixFieldMapper; + private PhraseFieldMapper phraseFieldMapper; protected TextFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, - int positionIncrementGap, PrefixFieldMapper prefixFieldMapper, + int positionIncrementGap, PrefixFieldMapper prefixFieldMapper, boolean indexPhrases, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) { super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); assert fieldType.tokenized(); @@ -480,6 +611,9 @@ protected TextFieldMapper(String simpleName, MappedFieldType fieldType, MappedFi } this.positionIncrementGap = positionIncrementGap; this.prefixFieldMapper = prefixFieldMapper; + this.phraseFieldMapper = indexPhrases + ? new PhraseFieldMapper(PhraseFieldType.newInstance(simpleName + FAST_PHRASE_SUFFIX, fieldType().indexAnalyzer()), indexSettings) + : null; } @Override @@ -513,15 +647,25 @@ protected void parseCreateField(ParseContext context, List field if (prefixFieldMapper != null) { prefixFieldMapper.addField(value, fields); } + if (phraseFieldMapper != null) { + fields.add(new Field(phraseFieldMapper.fieldType.name(), value, phraseFieldMapper.fieldType)); + } } } @Override public Iterator iterator() { - if (prefixFieldMapper == null) { + List subIterators = new ArrayList<>(); + if (prefixFieldMapper != null) { + subIterators.add(prefixFieldMapper); + } + if (phraseFieldMapper != null) { + subIterators.add(phraseFieldMapper); + } + if (subIterators.size() == 0) { return super.iterator(); } - return Iterators.concat(super.iterator(), Collections.singleton(prefixFieldMapper).iterator()); + return Iterators.concat(super.iterator(), subIterators.iterator()); } @Override @@ -540,6 +684,10 @@ else if (this.prefixFieldMapper != null || mw.prefixFieldMapper != null) { throw new IllegalArgumentException("mapper [" + name() + "] has different index_prefix settings, current [" + this.prefixFieldMapper + "], merged [" + mw.prefixFieldMapper + "]"); } + else if (this.fieldType().indexPhrases != mw.fieldType().indexPhrases) { + throw new IllegalArgumentException("mapper [" + name() + "] has different index_phrase settings, current [" + + this.fieldType().indexPhrases + "], merged [" + mw.fieldType().indexPhrases + "]"); + } } @Override @@ -580,5 +728,6 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, if (fieldType().prefixFieldType != null) { fieldType().prefixFieldType.doXContent(builder); } + builder.field("index_phrases", fieldType().indexPhrases); } } diff --git a/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java index ef88db6c12ce0..142c3fed851fc 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.search.MatchQuery.ZeroTermsQuery; @@ -172,11 +173,8 @@ protected Query doToQuery(QueryShardContext context) throws IOException { throw new QueryShardException(context, "[" + NAME + "] analyzer [" + analyzer + "] not found"); } - MatchQuery matchQuery = new MatchQuery(context); - if (analyzer != null) { - matchQuery.setAnalyzer(analyzer); - } - matchQuery.setPhraseSlop(slop); + MappedFieldType fieldType = context.fieldMapper(fieldName); + MatchQuery matchQuery = fieldType.matchQuery(context, analyzer, slop); matchQuery.setZeroTermsQuery(zeroTermsQuery); return matchQuery.parse(MatchQuery.Type.PHRASE, fieldName, value); diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 03b4715c4b178..354dcac257bff 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -269,7 +269,7 @@ public Query parse(Type type, String fieldName, Object value) throws IOException Analyzer analyzer = getAnalyzer(fieldType, type == Type.PHRASE); assert analyzer != null; - MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType); + MatchQueryBuilder builder = newMatchQueryBuilder(analyzer, fieldType); builder.setEnablePositionIncrements(this.enablePositionIncrements); if (hasPositions(fieldType)) { builder.setAutoGenerateMultiTermSynonymsPhraseQuery(this.autoGenerateSynonymsPhraseQuery); @@ -327,14 +327,18 @@ protected Query zeroTermsQuery() { } } - private class MatchQueryBuilder extends QueryBuilder { + protected MatchQueryBuilder newMatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { + return new MatchQueryBuilder(analyzer, mapper); + } + + protected class MatchQueryBuilder extends QueryBuilder { private final MappedFieldType mapper; /** * Creates a new QueryBuilder using the given analyzer. */ - MatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { + protected MatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { super(analyzer); this.mapper = mapper; } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index bd1d25eac5705..f5adea2af11f7 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; @@ -29,6 +31,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; @@ -47,7 +50,9 @@ import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService.MergeReason; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; +import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -65,6 +70,7 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.core.Is.is; public class TextFieldMapperTests extends ESSingleNodeTestCase { @@ -670,6 +676,56 @@ public void testIndexPrefixIndexTypes() throws IOException { } } + public void testFastPhraseMapping() throws IOException { + + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { + throw new UnsupportedOperationException(); + }, null); + + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field") + .field("type", "text") + .field("analyzer", "english") + .field("index_phrases", true) + .endObject().endObject() + .endObject().endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); + + Query q = mapper.mappers().getMapper("field").fieldType() + .matchQuery(queryShardContext, null, 0).parse(MatchQuery.Type.PHRASE, "field", "two words"); + assertThat(q, is(new PhraseQuery("field._index_phrase", "two word"))); + + Query q2 = mapper.mappers().getMapper("field").fieldType() + .matchQuery(queryShardContext, null, 0).parse(MatchQuery.Type.PHRASE, "field", "three words here"); + assertThat(q2, is(new PhraseQuery("field._index_phrase", "three word", "word here"))); + + Query q3 = mapper.mappers().getMapper("field").fieldType() + .matchQuery(queryShardContext, null, 1).parse(MatchQuery.Type.PHRASE, "field", "two words"); + assertThat(q3, is(new PhraseQuery(1, "field", "two", "word"))); + + ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference + .bytes(XContentFactory.jsonBuilder() + .startObject() + .field("field", "Some English text that is going to be very useful") + .endObject()), + XContentType.JSON)); + + IndexableField[] fields = doc.rootDoc().getFields("field._index_phrase"); + assertEquals(1, fields.length); + + try (TokenStream ts = fields[0].tokenStream(queryShardContext.getMapperService().indexAnalyzer(), null)) { + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + assertTrue(ts.incrementToken()); + assertEquals("some english", termAtt.toString()); + } + } + public void testIndexPrefixMapping() throws IOException { QueryShardContext queryShardContext = indexService.newQueryShardContext( From aca2b7e6aa1b9431de6f6e68eda39a0d1e98f874 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 8 May 2018 09:37:30 +0100 Subject: [PATCH 04/12] docs --- docs/reference/mapping/types/text.asciidoc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc index 069e50fc79ac7..06f391fdfea24 100644 --- a/docs/reference/mapping/types/text.asciidoc +++ b/docs/reference/mapping/types/text.asciidoc @@ -96,6 +96,12 @@ The following parameters are accepted by `text` fields: the expense of a larger index. Accepts an <> +<>:: + + If enabled, two-term word combinations ('shingles') are indexed into a separate + field. This allows phrase queries to run more efficiently, at the expense + of a larger index. Accepts `true` or `false` (default). + <>:: Whether field-length should be taken into account when scoring queries. From e0fe29d5f3fd12148579e69e21e5101e5b332ead Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 8 May 2018 13:15:12 +0100 Subject: [PATCH 05/12] Placate checkstyle --- .../java/org/elasticsearch/index/mapper/TextFieldMapper.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 5f46162336a04..b6ff35d010730 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -612,7 +612,8 @@ protected TextFieldMapper(String simpleName, MappedFieldType fieldType, MappedFi this.positionIncrementGap = positionIncrementGap; this.prefixFieldMapper = prefixFieldMapper; this.phraseFieldMapper = indexPhrases - ? new PhraseFieldMapper(PhraseFieldType.newInstance(simpleName + FAST_PHRASE_SUFFIX, fieldType().indexAnalyzer()), indexSettings) + ? new PhraseFieldMapper(PhraseFieldType.newInstance(simpleName + FAST_PHRASE_SUFFIX, + fieldType().indexAnalyzer()), indexSettings) : null; } From c156cbd5f3e4ea79f4f46b7a97f81335b9a22fe8 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 8 May 2018 13:18:20 +0100 Subject: [PATCH 06/12] changelog --- docs/CHANGELOG.asciidoc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 621ca5a6414d2..5ff659ada37be 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -150,6 +150,9 @@ option. ({pull}30140[#29658]) A new analysis plugin called `analysis_nori` that exposes the Lucene Korean analysis module. ({pull}30397[#30397]) +Text fields now have an `index_phrases` parameter that allows faster phrase queries +at the expense of a larger index ({pull}30450[#30450]) + [float] === Enhancements From 66b1e482b68177d3873e1f3188ad03b28851c0d2 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 22 May 2018 09:12:17 +0100 Subject: [PATCH 07/12] MappedFieldType.matchQuery -> .analyzePhrase --- .../index/mapper/MappedFieldType.java | 5 +- .../index/mapper/TextFieldMapper.java | 53 ++++++++----------- .../index/query/MatchPhraseQueryBuilder.java | 5 +- .../index/search/MatchQuery.java | 12 ++--- .../index/mapper/TextFieldMapperTests.java | 9 ++-- 5 files changed, 34 insertions(+), 50 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index ac462d9c2a4b6..87da9c2fcb755 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; @@ -361,8 +362,8 @@ public Query nullValueQuery() { public abstract Query existsQuery(QueryShardContext context); - public MatchQuery matchQuery(QueryShardContext context, String analyzer, int slop) { - throw new QueryShardException(context, "Can only use match queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"); + public Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + throw new IllegalArgumentException("Can only use phrase queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } /** diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index b6ff35d010730..6accc5864c91d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -26,6 +26,8 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; import org.apache.lucene.analysis.shingle.FixedShingleFilter; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; @@ -33,6 +35,7 @@ import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.NormsFieldExistsQuery; +import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.collect.Iterators; @@ -550,16 +553,25 @@ public Query nullValueQuery() { } @Override - public MatchQuery matchQuery(QueryShardContext context, String analyzer, int slop) { - if (indexPhrases == false || slop != 0) { - MatchQuery mq = new MatchQuery(context); - if (analyzer != null) { - mq.setAnalyzer(analyzer); - } - mq.setPhraseSlop(slop); - return mq; + public Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + if (indexPhrases && slop == 0) { + stream = new FixedShingleFilter(stream, 2); + field = field + FAST_PHRASE_SUFFIX; + } + PhraseQuery.Builder builder = new PhraseQuery.Builder(); + builder.setSlop(slop); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + int position = -1; + + stream.reset(); + while (stream.incrementToken()) { + position += posIncrAtt.getPositionIncrement(); + builder.add(new Term(field, termAtt.getBytesRef()), position); } - return new ShingledMatchQuery(context, analyzer); + + return builder.build(); } @Override @@ -573,29 +585,6 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { } } - private static class ShingledMatchQuery extends MatchQuery { - - ShingledMatchQuery(QueryShardContext context, String analyzer) { - super(context); - if (analyzer != null) { - this.setAnalyzer(analyzer); - } - } - - @Override - protected MatchQuery.MatchQueryBuilder newMatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { - return new MatchQuery.MatchQueryBuilder(analyzer, mapper){ - @Override - protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { - assert slop == 0; - Query q = super.analyzePhrase(field + FAST_PHRASE_SUFFIX, new FixedShingleFilter(stream, 2), slop); - logger.info("Phrase query: " + q); - return q; - } - }; - } - } - private int positionIncrementGap; private PrefixFieldMapper prefixFieldMapper; private PhraseFieldMapper phraseFieldMapper; diff --git a/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java index 509601cff2198..53e5d9d0c9134 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java @@ -173,8 +173,9 @@ protected Query doToQuery(QueryShardContext context) throws IOException { throw new QueryShardException(context, "[" + NAME + "] analyzer [" + analyzer + "] not found"); } - MappedFieldType fieldType = context.fieldMapper(fieldName); - MatchQuery matchQuery = fieldType.matchQuery(context, analyzer, slop); + MatchQuery matchQuery = new MatchQuery(context); + matchQuery.setAnalyzer(analyzer); + matchQuery.setPhraseSlop(slop); matchQuery.setZeroTermsQuery(zeroTermsQuery); return matchQuery.parse(MatchQuery.Type.PHRASE, fieldName, value); diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 354dcac257bff..c122577dc04d2 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -269,7 +269,7 @@ public Query parse(Type type, String fieldName, Object value) throws IOException Analyzer analyzer = getAnalyzer(fieldType, type == Type.PHRASE); assert analyzer != null; - MatchQueryBuilder builder = newMatchQueryBuilder(analyzer, fieldType); + MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType); builder.setEnablePositionIncrements(this.enablePositionIncrements); if (hasPositions(fieldType)) { builder.setAutoGenerateMultiTermSynonymsPhraseQuery(this.autoGenerateSynonymsPhraseQuery); @@ -327,18 +327,14 @@ protected Query zeroTermsQuery() { } } - protected MatchQueryBuilder newMatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { - return new MatchQueryBuilder(analyzer, mapper); - } - - protected class MatchQueryBuilder extends QueryBuilder { + private class MatchQueryBuilder extends QueryBuilder { private final MappedFieldType mapper; /** * Creates a new QueryBuilder using the given analyzer. */ - protected MatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { + MatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { super(analyzer); this.mapper = mapper; } @@ -364,7 +360,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws throw exc; } } - Query query = super.analyzePhrase(field, stream, slop); + Query query = mapper.analyzePhrase(field, stream, slop); if (query instanceof PhraseQuery) { // synonyms that expand to multiple terms can return a phrase query. return blendPhraseQuery((PhraseQuery) query, mapper); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index f5adea2af11f7..26fd0a8318121 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -696,16 +696,13 @@ public void testFastPhraseMapping() throws IOException { queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); - Query q = mapper.mappers().getMapper("field").fieldType() - .matchQuery(queryShardContext, null, 0).parse(MatchQuery.Type.PHRASE, "field", "two words"); + Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext); assertThat(q, is(new PhraseQuery("field._index_phrase", "two word"))); - Query q2 = mapper.mappers().getMapper("field").fieldType() - .matchQuery(queryShardContext, null, 0).parse(MatchQuery.Type.PHRASE, "field", "three words here"); + Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext); assertThat(q2, is(new PhraseQuery("field._index_phrase", "three word", "word here"))); - Query q3 = mapper.mappers().getMapper("field").fieldType() - .matchQuery(queryShardContext, null, 1).parse(MatchQuery.Type.PHRASE, "field", "two words"); + Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); assertThat(q3, is(new PhraseQuery(1, "field", "two", "word"))); ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference From 9eb8a6d3b9431ffe5c5a91101149987a9a5f3f7a Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 24 May 2018 11:46:31 +0100 Subject: [PATCH 08/12] iter --- docs/reference/mapping/types/text.asciidoc | 6 ++- .../index/mapper/TextFieldMapper.java | 51 ++++++++++++------- .../index/query/MatchPhraseQueryBuilder.java | 4 +- .../index/mapper/TextFieldMapperTests.java | 7 +++ .../index/mapper/TextFieldTypeTests.java | 7 +++ 5 files changed, 55 insertions(+), 20 deletions(-) diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc index 06f391fdfea24..d79aece0083ea 100644 --- a/docs/reference/mapping/types/text.asciidoc +++ b/docs/reference/mapping/types/text.asciidoc @@ -99,8 +99,10 @@ The following parameters are accepted by `text` fields: <>:: If enabled, two-term word combinations ('shingles') are indexed into a separate - field. This allows phrase queries to run more efficiently, at the expense - of a larger index. Accepts `true` or `false` (default). + field. This allows exact phrase queries to run more efficiently, at the expense + of a larger index. Note that this works best when stopwords are not removed, + as phrases containing stopwords will not use the subsidiary field and will fall + back to a standard phrase query. Accepts `true` or `false` (default). <>:: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 6accc5864c91d..17198599574b5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -22,6 +22,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; @@ -186,7 +187,7 @@ public TextFieldMapper build(BuilderContext context) { } fieldType().indexPhrases(indexPhrases); return new TextFieldMapper( - name, fieldType, defaultFieldType, positionIncrementGap, prefixMapper, indexPhrases, + name, fieldType(), defaultFieldType, positionIncrementGap, prefixMapper, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); } } @@ -286,16 +287,20 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp private static final class PhraseFieldType extends StringFieldType { - static PhraseFieldType newInstance(String name, NamedAnalyzer analyzer) { - PhraseFieldType pft = new PhraseFieldType(name); - pft.setAnalyzer(analyzer.name(), analyzer.analyzer()); - return pft; - } + final TextFieldType parent; - PhraseFieldType(String name) { + PhraseFieldType(TextFieldType parent) { setTokenized(true); setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); - setName(name); + if (parent.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { + setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + } + if (parent.storeTermVectorOffsets()) { + setStoreTermVectorOffsets(true); + } + setAnalyzer(parent.indexAnalyzer().name(), parent.indexAnalyzer().analyzer()); + setName(parent.name() + FAST_PHRASE_SUFFIX); + this.parent = parent; } void setAnalyzer(String name, Analyzer delegate) { @@ -304,7 +309,7 @@ void setAnalyzer(String name, Analyzer delegate) { @Override public MappedFieldType clone() { - return new PhraseFieldType(name()); + return new PhraseFieldType(parent); } @Override @@ -448,6 +453,7 @@ protected TextFieldType(TextFieldType ref) { this.fielddataMinFrequency = ref.fielddataMinFrequency; this.fielddataMaxFrequency = ref.fielddataMaxFrequency; this.fielddataMinSegmentSize = ref.fielddataMinSegmentSize; + this.indexPhrases = ref.indexPhrases; } public TextFieldType clone() { @@ -461,6 +467,7 @@ public boolean equals(Object o) { } TextFieldType that = (TextFieldType) o; return fielddata == that.fielddata + && indexPhrases == that.indexPhrases && fielddataMinFrequency == that.fielddataMinFrequency && fielddataMaxFrequency == that.fielddataMaxFrequency && fielddataMinSegmentSize == that.fielddataMinSegmentSize; @@ -468,7 +475,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(super.hashCode(), fielddata, + return Objects.hash(super.hashCode(), fielddata, indexPhrases, fielddataMinFrequency, fielddataMaxFrequency, fielddataMinSegmentSize); } @@ -514,6 +521,7 @@ void setPrefixFieldType(PrefixFieldType prefixFieldType) { } void indexPhrases(boolean indexPhrases) { + checkIfFrozen(); this.indexPhrases = indexPhrases; } @@ -554,7 +562,8 @@ public Query nullValueQuery() { @Override public Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { - if (indexPhrases && slop == 0) { + + if (indexPhrases && slop == 0 && hasGaps(stream) == false) { stream = new FixedShingleFilter(stream, 2); field = field + FAST_PHRASE_SUFFIX; } @@ -574,6 +583,17 @@ public Query analyzePhrase(String field, TokenStream stream, int slop) throws IO return builder.build(); } + private static boolean hasGaps(TokenStream stream) throws IOException { + assert stream instanceof CachingTokenFilter; + PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class); + stream.reset(); + while (stream.incrementToken()) { + if (posIncAtt.getPositionIncrement() > 1) + return true; + } + return false; + } + @Override public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { if (fielddata == false) { @@ -589,8 +609,8 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { private PrefixFieldMapper prefixFieldMapper; private PhraseFieldMapper phraseFieldMapper; - protected TextFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, - int positionIncrementGap, PrefixFieldMapper prefixFieldMapper, boolean indexPhrases, + protected TextFieldMapper(String simpleName, TextFieldType fieldType, MappedFieldType defaultFieldType, + int positionIncrementGap, PrefixFieldMapper prefixFieldMapper, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) { super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); assert fieldType.tokenized(); @@ -600,10 +620,7 @@ protected TextFieldMapper(String simpleName, MappedFieldType fieldType, MappedFi } this.positionIncrementGap = positionIncrementGap; this.prefixFieldMapper = prefixFieldMapper; - this.phraseFieldMapper = indexPhrases - ? new PhraseFieldMapper(PhraseFieldType.newInstance(simpleName + FAST_PHRASE_SUFFIX, - fieldType().indexAnalyzer()), indexSettings) - : null; + this.phraseFieldMapper = fieldType.indexPhrases ? new PhraseFieldMapper(new PhraseFieldType(fieldType), indexSettings) : null; } @Override diff --git a/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java index 53e5d9d0c9134..4639b8df8e539 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java @@ -174,7 +174,9 @@ protected Query doToQuery(QueryShardContext context) throws IOException { } MatchQuery matchQuery = new MatchQuery(context); - matchQuery.setAnalyzer(analyzer); + if (analyzer != null) { + matchQuery.setAnalyzer(analyzer); + } matchQuery.setPhraseSlop(slop); matchQuery.setZeroTermsQuery(zeroTermsQuery); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 26fd0a8318121..f0408da19214c 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -705,6 +705,13 @@ public void testFastPhraseMapping() throws IOException { Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); assertThat(q3, is(new PhraseQuery(1, "field", "two", "word"))); + Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext); + assertThat(q4, is(new TermQuery(new Term("field", "singleton")))); + + Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + assertThat(q5, + is(new PhraseQuery.Builder().add(new Term("field", "sparkl")).add(new Term("field", "stopword"), 2).build())); + ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference .bytes(XContentFactory.jsonBuilder() .startObject() diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java index 895bb97e16665..2c5cb3d32dbc9 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java @@ -71,6 +71,13 @@ public void modify(MappedFieldType ft) { tft.setFielddataMinSegmentSize(1000); } }); + addModifier(new Modifier("index_phrases", true) { + @Override + public void modify(MappedFieldType ft) { + TextFieldMapper.TextFieldType tft = (TextFieldMapper.TextFieldType)ft; + tft.indexPhrases(true); + } + }); } public void testTermQuery() { From 0deebb65bebb72519a9a14a70b1c639c91c53943 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 24 May 2018 11:52:08 +0100 Subject: [PATCH 09/12] iter --- .../org/elasticsearch/index/analysis/IcuTokenizerFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java index 2d8226745f8b8..84c611c0f8132 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java @@ -80,7 +80,7 @@ private ICUTokenizerConfig getIcuConfig(Environment env, Settings settings) { if (tailored.isEmpty()) { return null; } else { - final RuleBasedBreakIterator breakers[] = new RuleBasedBreakIterator[UScript.CODE_LIMIT]; + final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT]; for (Map.Entry entry : tailored.entrySet()) { int code = entry.getKey(); String resourcePath = entry.getValue(); @@ -105,7 +105,7 @@ public RuleBasedBreakIterator getBreakIterator(int script) { } //parse a single RBBi rule file - private RuleBasedBreakIterator parseRules(String filename, Environment env) throws IOException { + private BreakIterator parseRules(String filename, Environment env) throws IOException { final Path path = env.configFile().resolve(filename); String rules = Files.readAllLines(path) From 6cfa4b1bb0c28b8b79fa0c4f729a7c0912c9ffdb Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 29 May 2018 12:19:35 +0100 Subject: [PATCH 10/12] iter --- .../index/mapper/MappedFieldType.java | 8 +- .../index/mapper/TextFieldMapper.java | 81 ++++++++++++++++--- .../index/search/MatchQuery.java | 32 ++++++-- .../index/mapper/TextFieldMapperTests.java | 28 ++++++- .../index/mapper/TextFieldTypeTests.java | 5 +- 5 files changed, 124 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 87da9c2fcb755..be75126ec0f53 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -362,8 +362,12 @@ public Query nullValueQuery() { public abstract Query existsQuery(QueryShardContext context); - public Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { - throw new IllegalArgumentException("Can only use phrase queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"); + public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); + } + + public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } /** diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 17198599574b5..a811c3ea1642d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.NormsFieldExistsQuery; import org.apache.lucene.search.PhraseQuery; @@ -49,7 +50,6 @@ import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.index.search.MatchQuery; import java.io.IOException; import java.util.ArrayList; @@ -94,7 +94,6 @@ public static class Builder extends FieldMapper.Builder multiTerms = new ArrayList<>(); + stream.reset(); + while (stream.incrementToken()) { + int positionIncrement = posIncrAtt.getPositionIncrement(); + + if (positionIncrement > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + multiTerms.clear(); + } + position += positionIncrement; + multiTerms.add(new Term(field, termAtt.getBytesRef())); + } + + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + return mpqb.build(); + } + + private static CachingTokenFilter cache(TokenStream in) { + if (in instanceof CachingTokenFilter) { + return (CachingTokenFilter) in; + } + return new CachingTokenFilter(in); + } + + private static boolean hasGaps(CachingTokenFilter stream) throws IOException { PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); while (stream.incrementToken()) { - if (posIncAtt.getPositionIncrement() > 1) + if (posIncAtt.getPositionIncrement() > 1) { return true; + } } return false; } @@ -692,7 +745,7 @@ else if (this.prefixFieldMapper != null || mw.prefixFieldMapper != null) { + this.prefixFieldMapper + "], merged [" + mw.prefixFieldMapper + "]"); } else if (this.fieldType().indexPhrases != mw.fieldType().indexPhrases) { - throw new IllegalArgumentException("mapper [" + name() + "] has different index_phrase settings, current [" + throw new IllegalArgumentException("mapper [" + name() + "] has different index_phrases settings, current [" + this.fieldType().indexPhrases + "], merged [" + mw.fieldType().indexPhrases + "]"); } } @@ -735,6 +788,8 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, if (fieldType().prefixFieldType != null) { fieldType().prefixFieldType.doXContent(builder); } - builder.field("index_phrases", fieldType().indexPhrases); + if (fieldType().indexPhrases) { + builder.field("index_phrases", fieldType().indexPhrases); + } } } diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index c122577dc04d2..7b6f58dbd6614 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -29,7 +29,6 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; -import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; @@ -351,16 +350,14 @@ protected Query newSynonymQuery(Term[] terms) { @Override protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { - if (hasPositions(mapper) == false) { - IllegalStateException exc = - new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); + IllegalStateException e = checkForPositions(field); + if (e != null) { if (lenient) { - return newLenientFieldQuery(field, exc); - } else { - throw exc; + return newLenientFieldQuery(field, e); } + throw e; } - Query query = mapper.analyzePhrase(field, stream, slop); + Query query = mapper.phraseQuery(field, stream, slop, enablePositionIncrements); if (query instanceof PhraseQuery) { // synonyms that expand to multiple terms can return a phrase query. return blendPhraseQuery((PhraseQuery) query, mapper); @@ -368,6 +365,25 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws return query; } + @Override + protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { + IllegalStateException e = checkForPositions(field); + if (e != null) { + if (lenient) { + return newLenientFieldQuery(field, e); + } + throw e; + } + return mapper.multiPhraseQuery(field, stream, slop, enablePositionIncrements); + } + + private IllegalStateException checkForPositions(String field) { + if (hasPositions(mapper) == false) { + return new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); + } + return null; + } + /** * Checks if graph analysis should be enabled for the field depending * on the provided {@link Analyzer} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index f0408da19214c..604002a1ce60d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; @@ -41,6 +42,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.lucene.uid.Versions; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; @@ -79,7 +81,13 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase { @Before public void setup() { - indexService = createIndex("test"); + Settings settings = Settings.builder() + .put("index.analysis.filter.mySynonyms.type", "synonym") + .putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto")) + .put("index.analysis.analyzer.synonym.tokenizer", "standard") + .put("index.analysis.analyzer.synonym.filter", "mySynonyms") + .build(); + indexService = createIndex("test", settings); parser = indexService.mapperService().documentMapperParser(); } @@ -684,11 +692,18 @@ public void testFastPhraseMapping() throws IOException { }, null); String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") - .startObject("properties").startObject("field") + .startObject("properties") + .startObject("field") .field("type", "text") .field("analyzer", "english") .field("index_phrases", true) - .endObject().endObject() + .endObject() + .startObject("synfield") + .field("type", "text") + .field("analyzer", "synonym") + .field("index_phrases", true) + .endObject() + .endObject() .endObject().endObject()); DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); @@ -712,6 +727,13 @@ public void testFastPhraseMapping() throws IOException { assertThat(q5, is(new PhraseQuery.Builder().add(new Term("field", "sparkl")).add(new Term("field", "stopword"), 2).build())); + Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext); + assertThat(q6, is(new MultiPhraseQuery.Builder() + .add(new Term[]{ + new Term("synfield._index_phrase", "motor car"), + new Term("synfield._index_phrase", "motor auto")}) + .build())); + ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference .bytes(XContentFactory.jsonBuilder() .startObject() diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java index 2c5cb3d32dbc9..fdd5104e5b618 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java @@ -22,7 +22,6 @@ import java.util.Arrays; import java.util.List; -import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.search.TermInSetQuery; @@ -31,8 +30,6 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.unit.Fuzziness; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.TextFieldMapper; import org.junit.Before; public class TextFieldTypeTests extends FieldTypeTestCase { @@ -75,7 +72,7 @@ public void modify(MappedFieldType ft) { @Override public void modify(MappedFieldType ft) { TextFieldMapper.TextFieldType tft = (TextFieldMapper.TextFieldType)ft; - tft.indexPhrases(true); + tft.setIndexPhrases(true); } }); } From 1d7852ec16f51502fdc7efa822b939edc6b30304 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 29 May 2018 16:52:31 +0100 Subject: [PATCH 11/12] Check for positions --- .../index/mapper/TextFieldMapper.java | 8 ++++++ .../index/mapper/TextFieldMapperTests.java | 28 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index a811c3ea1642d..0af1a68dd8126 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -184,6 +184,14 @@ public TextFieldMapper build(BuilderContext context) { prefixFieldType.setAnalyzer(fieldType.indexAnalyzer()); prefixMapper = new PrefixFieldMapper(prefixFieldType, context.indexSettings()); } + if (fieldType().indexPhrases) { + if (fieldType().isSearchable() == false) { + throw new IllegalArgumentException("Cannot set index_phrases on unindexed field [" + name() + "]"); + } + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { + throw new IllegalArgumentException("Cannot set index_phrases on field [" + name() + "] if positions are not enabled"); + } + } return new TextFieldMapper( name, fieldType(), defaultFieldType, positionIncrementGap, prefixMapper, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 604002a1ce60d..077c68ccc8ed3 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -750,6 +750,34 @@ public void testFastPhraseMapping() throws IOException { assertTrue(ts.incrementToken()); assertEquals("some english", termAtt.toString()); } + + { + String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field") + .field("type", "text") + .field("index", "false") + .field("index_phrases", true) + .endObject().endObject() + .endObject().endObject()); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> parser.parse("type", new CompressedXContent(badConfigMapping)) + ); + assertThat(e.getMessage(), containsString("Cannot set index_phrases on unindexed field [field]")); + } + + { + String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field") + .field("type", "text") + .field("index_options", "freqs") + .field("index_phrases", true) + .endObject().endObject() + .endObject().endObject()); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> parser.parse("type", new CompressedXContent(badConfigMapping)) + ); + assertThat(e.getMessage(), containsString("Cannot set index_phrases on field [field] if positions are not enabled")); + } } public void testIndexPrefixMapping() throws IOException { From 8dd5cd528cb581b407db333f54e1abb3b662ed06 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 30 May 2018 15:14:48 +0100 Subject: [PATCH 12/12] index_phrases isn't an updateable setting --- .../org/elasticsearch/index/mapper/TextFieldMapper.java | 9 +++++++++ .../elasticsearch/index/mapper/TextFieldTypeTests.java | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index f4807dde8a3dd..df8f4467109d4 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -688,6 +688,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { } return new PagedBytesIndexFieldData.Builder(fielddataMinFrequency, fielddataMaxFrequency, fielddataMinSegmentSize); } + + @Override + public void checkCompatibility(MappedFieldType other, List conflicts) { + super.checkCompatibility(other, conflicts); + TextFieldType tft = (TextFieldType) other; + if (tft.indexPhrases != this.indexPhrases) { + conflicts.add("mapper [" + name() + "] has different [index_phrases] values"); + } + } } private int positionIncrementGap; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java index 6b775a68dbf04..a556af0471184 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java @@ -68,7 +68,7 @@ public void modify(MappedFieldType ft) { tft.setFielddataMinSegmentSize(1000); } }); - addModifier(new Modifier("index_phrases", true) { + addModifier(new Modifier("index_phrases", false) { @Override public void modify(MappedFieldType ft) { TextFieldMapper.TextFieldType tft = (TextFieldMapper.TextFieldType) ft;