Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set analyzer version in PreBuiltAnalyzerProviderFactory #31202

Merged
merged 1 commit into from
Jun 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -247,108 +247,31 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE, version -> {
Analyzer a = new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET);
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version -> {
Analyzer a = new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true,
CharArraySet.EMPTY_SET);
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE, version -> {
Analyzer a = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, version -> {
Analyzer a = new ArabicAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, version -> {
Analyzer a = new ArmenianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, version -> {
Analyzer a = new BasqueAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, version -> {
Analyzer a = new BengaliAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, version -> {
Analyzer a = new BrazilianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, version -> {
Analyzer a = new BulgarianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, version -> {
Analyzer a = new CatalanAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.LUCENE, version -> {
// only for old indices, best effort
Analyzer a = new StandardAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, version -> {
Analyzer a = new CJKAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, version -> {
Analyzer a = new CzechAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, version -> {
Analyzer a = new DanishAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, version -> {
Analyzer a = new DutchAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, version -> {
Analyzer a = new EnglishAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, version -> {
Analyzer a = new FinnishAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, version -> {
Analyzer a = new FrenchAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, version -> {
Analyzer a = new GalicianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, version -> {
Analyzer a = new GermanAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE,
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH,
() -> new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true,
CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE,
() -> new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, ArabicAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, ArmenianAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, BasqueAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, BengaliAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, BrazilianAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, BulgarianAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, CatalanAnalyzer::new));
// chinese analyzer: only for old indices, best effort
analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.LUCENE, StandardAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, CJKAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, CzechAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, DanishAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, DutchAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, EnglishAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, FinnishAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, FrenchAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, GalicianAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, GermanAnalyzer::new));
return analyzers;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.util.Collection;
import java.util.List;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;

public class PreBuiltAnalyzerProviderFactory extends PreConfiguredAnalysisComponent<AnalyzerProvider<?>> implements Closeable {
Expand All @@ -46,13 +47,17 @@ public class PreBuiltAnalyzerProviderFactory extends PreConfiguredAnalysisCompon
PreBuiltAnalyzerProviderFactory(String name, PreBuiltAnalyzers preBuiltAnalyzer) {
super(name, new PreBuiltAnalyzersDelegateCache(name, preBuiltAnalyzer));
this.create = preBuiltAnalyzer::getAnalyzer;
current = new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, preBuiltAnalyzer.getAnalyzer(Version.CURRENT));
Analyzer analyzer = preBuiltAnalyzer.getAnalyzer(Version.CURRENT);
analyzer.setVersion(Version.CURRENT.luceneVersion);
current = new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer);
}

public PreBuiltAnalyzerProviderFactory(String name, PreBuiltCacheFactory.CachingStrategy cache, Function<Version, Analyzer> create) {
public PreBuiltAnalyzerProviderFactory(String name, PreBuiltCacheFactory.CachingStrategy cache, Supplier<Analyzer> create) {
super(name, cache);
this.create = create;
this.current = new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, create.apply(Version.CURRENT));
this.create = version -> create.get();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can change this to a Supplier<Analyzer> now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is still needed at line 49, otherwise we can't delegate the request version all the down to PreBuiltAnalyzers#getAnalyzer(...). Once all analyzers have been ported then I will change the type of the create field to Supplier<Analyzer>.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah! I get it now. LGTM

Analyzer analyzer = create.get();
analyzer.setVersion(Version.CURRENT.luceneVersion);
this.current = new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer);
}

@Override
Expand All @@ -71,7 +76,9 @@ public AnalyzerProvider<?> get(IndexSettings indexSettings,
@Override
protected AnalyzerProvider<?> create(Version version) {
assert Version.CURRENT.equals(version) == false;
return new PreBuiltAnalyzerProvider(getName(), AnalyzerScope.INDICES, create.apply(version));
Analyzer analyzer = create.apply(version);
analyzer.setVersion(version.luceneVersion);
return new PreBuiltAnalyzerProvider(getName(), AnalyzerScope.INDICES, analyzer);
}

@Override
Expand Down