-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Index phrases #30450
Index phrases #30450
Changes from 12 commits
e39d396
e931107
3719e30
4cea300
efd612a
b591fc4
aca2b7e
910f0c1
e0fe29d
c156cbd
007ee3d
66b1e48
4d6cb66
9eb8a6d
69cf210
0deebb6
6cfa4b1
6208c31
1d7852e
289186e
518e280
8dd5cd5
dab97ad
b2e732a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,7 +80,7 @@ private ICUTokenizerConfig getIcuConfig(Environment env, Settings settings) { | |
if (tailored.isEmpty()) { | ||
return null; | ||
} else { | ||
final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT]; | ||
final RuleBasedBreakIterator breakers[] = new RuleBasedBreakIterator[UScript.CODE_LIMIT]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is already in master |
||
for (Map.Entry<Integer, String> entry : tailored.entrySet()) { | ||
int code = entry.getKey(); | ||
String resourcePath = entry.getValue(); | ||
|
@@ -105,7 +105,7 @@ public RuleBasedBreakIterator getBreakIterator(int script) { | |
} | ||
|
||
//parse a single RBBi rule file | ||
private BreakIterator parseRules(String filename, Environment env) throws IOException { | ||
private RuleBasedBreakIterator parseRules(String filename, Environment env) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
|
||
final Path path = env.configFile().resolve(filename); | ||
String rules = Files.readAllLines(path) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
--- | ||
"search with indexed phrases": | ||
- skip: | ||
version: " - 6.99.99" | ||
reason: index_phrase is only available as of 7.0.0 | ||
- do: | ||
indices.create: | ||
index: test | ||
body: | ||
mappings: | ||
test: | ||
properties: | ||
text: | ||
type: text | ||
index_phrases: true | ||
|
||
- do: | ||
index: | ||
index: test | ||
type: test | ||
id: 1 | ||
body: { text: "peter piper picked a peck of pickled peppers" } | ||
|
||
- do: | ||
indices.refresh: | ||
index: [test] | ||
|
||
- do: | ||
search: | ||
index: test | ||
body: | ||
query: | ||
match_phrase: | ||
text: | ||
query: "peter piper" | ||
|
||
- match: {hits.total: 1} | ||
|
||
- do: | ||
search: | ||
index: test | ||
q: '"peter piper"~1' | ||
df: text | ||
|
||
- match: {hits.total: 1} | ||
|
||
- do: | ||
search: | ||
index: test | ||
body: | ||
query: | ||
match_phrase: | ||
text: "peter piper picked" | ||
|
||
- match: {hits.total: 1} | ||
|
||
- do: | ||
search: | ||
index: test | ||
body: | ||
query: | ||
match_phrase: | ||
text: "piper" | ||
|
||
- match: {hits.total: 1} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
|
||
package org.elasticsearch.index.mapper; | ||
|
||
import org.apache.lucene.analysis.TokenStream; | ||
import org.apache.lucene.document.FieldType; | ||
import org.apache.lucene.index.IndexOptions; | ||
import org.apache.lucene.index.IndexReader; | ||
|
@@ -43,6 +44,7 @@ | |
import org.elasticsearch.index.query.QueryRewriteContext; | ||
import org.elasticsearch.index.query.QueryShardContext; | ||
import org.elasticsearch.index.query.QueryShardException; | ||
import org.elasticsearch.index.search.MatchQuery; | ||
import org.elasticsearch.index.similarity.SimilarityProvider; | ||
import org.elasticsearch.search.DocValueFormat; | ||
import org.joda.time.DateTimeZone; | ||
|
@@ -360,6 +362,10 @@ public Query nullValueQuery() { | |
|
||
public abstract Query existsQuery(QueryShardContext context); | ||
|
||
public Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe |
||
throw new IllegalArgumentException("Can only use phrase queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe only mention text fields in the error message, even if we don't fail phrase queries on keywords, I'm not sure this is very useful? |
||
} | ||
|
||
/** | ||
* An enum used to describe the relation between the range of terms in a | ||
* shard when compared with a query range | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we should document that this feature works better when stop words are not removed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1