Skip to content

Commit

Permalink
Add support for wildcard field type (#13461)
Browse files Browse the repository at this point in the history
This adds support for the "wildcard" field type that supports efficient
execution of wildcard, prefix, and regexp queries by matching first against
trigrams (or bigrams or individual characters), then post-filtering by
evaluating the original field value against the pattern.

---------

Signed-off-by: Michael Froh <froh@amazon.com>
(cherry picked from commit b71e547)
  • Loading branch information
msfroh committed Jun 11, 2024
1 parent 8cf895b commit 8067a31
Show file tree
Hide file tree
Showing 7 changed files with 1,601 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Remote Store] Upload translog checkpoint as object metadata to translog.tlog([#13637](https://github.com/opensearch-project/OpenSearch/pull/13637))
- [Remote Store] Add dynamic cluster settings to set timeout for segments upload to Remote Store ([#13679](https://github.com/opensearch-project/OpenSearch/pull/13679))
- Add getMetadataFields to MapperService ([#13819](https://github.com/opensearch-project/OpenSearch/pull/13819))
- Add "wildcard" field type that supports efficient wildcard, prefix, and regexp queries ([#13461](https://github.com/opensearch-project/OpenSearch/pull/13461))
- Allow setting query parameters on requests ([#13776](https://github.com/opensearch-project/OpenSearch/issues/13776))
- [Remote Store] Add support to disable flush based on translog reader count ([#14027](https://github.com/opensearch-project/OpenSearch/pull/14027))
- [Query Insights] Add exporter support for top n queries ([#12982](https://github.com/opensearch-project/OpenSearch/pull/12982))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
setup:
- skip:
version: " - 2.99.99"
reason: "Added in 2.15, but need to skip pre-3.0 before backport"

- do:
indices.create:
index: test
body:
mappings:
properties:
my_field:
type: wildcard
fields:
lower:
type: wildcard
normalizer: lowercase
doc_values:
type: wildcard
doc_values: true

- do:
index:
index: test
id: 1
body:
my_field: "org.opensearch.transport.NodeDisconnectedException: [node_s0][127.0.0.1:39953][disconnected] disconnected"
- do:
index:
index: test
id: 2
body:
my_field: "[2024-06-08T06:31:37,443][INFO ][o.o.c.c.Coordinator ] [node_s2] cluster-manager node [{node_s0}{Nj7FjR7hRP2lh_zur8KN_g}{OTGOoWmmSsWP_RQ3tIKJ9g}{127.0.0.1}{127.0.0.1:39953}{imr}{shard_indexing_pressure_enabled=true}] failed, restarting discovery"

- do:
index:
index: test
id: 3
body:
my_field: "[2024-06-08T06:31:37,451][INFO ][o.o.c.s.ClusterApplierService] [node_s2] cluster-manager node changed {previous [{node_s0}{Nj7FjR7hRP2lh_zur8KN_g}{OTGOoWmmSsWP_RQ3tIKJ9g}{127.0.0.1}{127.0.0.1:39953}{imr}{shard_indexing_pressure_enabled=true}], current []}, term: 1, version: 24, reason: becoming candidate: onLeaderFailure"
- do:
index:
index: test
id: 4
body:
my_field: "[2024-06-08T06:31:37,452][WARN ][o.o.c.NodeConnectionsService] [node_s1] failed to connect to {node_s0}{Nj7FjR7hRP2lh_zur8KN_g}{OTGOoWmmSsWP_RQ3tIKJ9g}{127.0.0.1}{127.0.0.1:39953}{imr}{shard_indexing_pressure_enabled=true} (tried [1] times)"
- do:
index:
index: test
id: 5
body:
my_field: "AbCd"
- do:
index:
index: test
id: 6
body:
other_field: "test"
- do:
indices.refresh: {}

---
"term query matches exact value":
- do:
search:
index: test
body:
query:
term:
my_field: "AbCd"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

- do:
search:
index: test
body:
query:
term:
my_field.doc_values: "AbCd"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

---
"term query matches lowercase-normalized value":
- do:
search:
index: test
body:
query:
term:
my_field.lower: "abcd"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

- do:
search:
index: test
body:
query:
term:
my_field.lower: "ABCD"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

- do:
search:
index: test
body:
query:
term:
my_field: "abcd"
- match: { hits.total.value: 0 }

---
"wildcard query matches":
- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "*Node*Exception*"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }

---
"wildcard query matches lowercase-normalized field":
- do:
search:
index: test
body:
query:
wildcard:
my_field.lower:
value: "*node*exception*"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }

- do:
search:
index: test
body:
query:
wildcard:
my_field.lower:
value: "*NODE*EXCEPTION*"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }

- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "*node*exception*"
- match: { hits.total.value: 0 }

---
"prefix query matches":
- do:
search:
index: test
body:
query:
prefix:
my_field:
value: "[2024-06-08T"
- match: { hits.total.value: 3 }

---
"regexp query matches":
- do:
search:
index: test
body:
query:
regexp:
my_field:
value: ".*06-08.*cluster-manager node.*"
- match: { hits.total.value: 2 }

---
"regexp query matches lowercase-normalized field":
- do:
search:
index: test
body:
query:
regexp:
my_field.lower:
value: ".*06-08.*Cluster-Manager Node.*"
- match: { hits.total.value: 2 }

- do:
search:
index: test
body:
query:
regexp:
my_field:
value: ".*06-08.*Cluster-Manager Node.*"
- match: { hits.total.value: 0 }

---
"wildcard match-all works":
- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "*"
- match: { hits.total.value: 5 }
---
"regexp match-all works":
- do:
search:
index: test
body:
query:
regexp:
my_field:
value: ".*"
- match: { hits.total.value: 5 }
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
}
}

private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException {
static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException {
try (TokenStream ts = normalizer.tokenStream(field, value)) {
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
Expand Down
Loading

0 comments on commit 8067a31

Please sign in to comment.