Skip to content

Commit

Permalink
Call setReferences() on custom referring tokenfilters in _analyze (#3…
Browse files Browse the repository at this point in the history
…2157)

When building custom tokenfilters without an index in the _analyze endpoint,
we need to ensure that referring filters are correctly built by calling
their #setReferences() method

Fixes #32154
  • Loading branch information
romseygeek authored Jul 18, 2018
1 parent 6de1f96 commit cfb3014
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1557,3 +1557,18 @@
filter: [my_bengali_stem]
- length: { tokens: 1 }
- match: { tokens.0.token: কর }

---
"multiplexer":
- do:
indices.analyze:
body:
text: "The quick fox"
tokenizer: "standard"
filter:
- type: multiplexer
filters: [ lowercase, uppercase ]
preserve_original: false
- length: { tokens: 6 }
- match: { tokens.0.token: the }
- match: { tokens.1.token: THE }
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.ReferringFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
Expand Down Expand Up @@ -574,6 +575,7 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
Environment environment, Tuple<String, TokenizerFactory> tokenizerFactory,
List<CharFilterFactory> charFilterFactoryList, boolean normalizer) throws IOException {
List<TokenFilterFactory> tokenFilterFactoryList = new ArrayList<>();
List<ReferringFilterFactory> referringFilters = new ArrayList<>();
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
List<AnalyzeRequest.NameOrDefinition> tokenFilters = request.tokenFilters();
for (AnalyzeRequest.NameOrDefinition tokenFilter : tokenFilters) {
Expand All @@ -594,7 +596,9 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
tokenFilterFactory = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter", settings);
tokenFilterFactory = CustomAnalyzerProvider.checkAndApplySynonymFilter(tokenFilterFactory, tokenizerFactory.v1(), tokenizerFactory.v2(), tokenFilterFactoryList,
charFilterFactoryList, environment);

if (tokenFilterFactory instanceof ReferringFilterFactory) {
referringFilters.add((ReferringFilterFactory)tokenFilterFactory);
}

} else {
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
Expand Down Expand Up @@ -629,6 +633,26 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
tokenFilterFactoryList.add(tokenFilterFactory);
}
}
if (referringFilters.isEmpty() == false) {
// The request included at least one custom referring tokenfilter that has not already been built by the
// analysis registry, so we need to set its references. Note that this will only apply pre-built
// tokenfilters
if (indexSettings == null) {
Settings settings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
.build();
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
indexSettings = new IndexSettings(metaData, Settings.EMPTY);
}
Map<String, TokenFilterFactory> prebuiltFilters = analysisRegistry.buildTokenFilterFactories(indexSettings);
for (ReferringFilterFactory rff : referringFilters) {
rff.setReferences(prebuiltFilters);
}

}
return tokenFilterFactoryList;
}

Expand Down

0 comments on commit cfb3014

Please sign in to comment.