Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] [Bug] fix case sensitivity for wildcard queries #5493

Merged
merged 1 commit into from
Dec 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Removed
### Fixed
- Fix 1.x compatibility bug with stored Tasks ([#5412](https://github.com/opensearch-project/OpenSearch/pull/5412))
- Fix case sensitivity for wildcard queries ([#5462](https://github.com/opensearch-project/OpenSearch/pull/5462))
### Security

[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.4...2.x
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,39 @@ public void testKeywordWithWhitespace() throws Exception {
assertHitCount(resp, 3L);
}

public void testRegexCaseInsensitivity() throws Exception {
createIndex("messages");
List<IndexRequestBuilder> indexRequests = new ArrayList<>();
indexRequests.add(client().prepareIndex("messages").setId("1").setSource("message", "message: this is a TLS handshake"));
indexRequests.add(client().prepareIndex("messages").setId("2").setSource("message", "message: this is a tcp handshake"));
indexRandom(true, false, indexRequests);

SearchResponse response = client().prepareSearch("messages").setQuery(queryStringQuery("/TLS/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "1");

response = client().prepareSearch("messages").setQuery(queryStringQuery("/tls/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "1");

response = client().prepareSearch("messages").setQuery(queryStringQuery("/TCP/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "2");

response = client().prepareSearch("messages").setQuery(queryStringQuery("/tcp/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "2");

response = client().prepareSearch("messages").setQuery(queryStringQuery("/HANDSHAKE/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 2);
assertHits(response.getHits(), "1", "2");
}

public void testAllFields() throws Exception {
String indexBody = copyToStringFromClasspath("/org/opensearch/search/query/all-query-index.json");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,15 @@
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.regex.Pattern;

import static java.util.Collections.singletonMap;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
Expand Down Expand Up @@ -2089,8 +2092,14 @@ public void testWildcardQueryNormalizationOnTextField() {
refresh();

{
// test default case insensitivity: false
WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*");
SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 0L);

// test case insensitivity set to true
wildCardQuery = wildcardQuery("field1", "Bb*").caseInsensitive(true);
searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 1L);

wildCardQuery = wildcardQuery("field1", "bb*");
Expand All @@ -2099,6 +2108,24 @@ public void testWildcardQueryNormalizationOnTextField() {
}
}

/** tests wildcard case sensitivity */
public void testWildcardCaseSensitivity() {
assertAcked(prepareCreate("test").setMapping("field", "type=text"));
client().prepareIndex("test").setId("1").setSource("field", "lowercase text").get();
refresh();

// test case sensitive
SearchResponse response = client().prepareSearch("test").setQuery(wildcardQuery("field", "Text").caseInsensitive(false)).get();
assertNoFailures(response);
assertHitCount(response, 0);

// test case insensitive
response = client().prepareSearch("test").setQuery(wildcardQuery("field", "Text").caseInsensitive(true)).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "1");
}

/**
* Reserved characters should be excluded when the normalization is applied for keyword fields.
* See https://github.com/elastic/elasticsearch/issues/46300 for details.
Expand Down Expand Up @@ -2175,4 +2202,16 @@ public void testIssueFuzzyInsideSpanMulti() {
SearchResponse response = client().prepareSearch("test").setQuery(query).get();
assertHitCount(response, 1);
}

/**
* asserts the search response hits include the expected ids
*/
private void assertHits(SearchHits hits, String... ids) {
assertThat(hits.getTotalHits().value, equalTo((long) ids.length));
Set<String> hitIds = new HashSet<>();
for (SearchHit hit : hits.getHits()) {
hitIds.add(hit.getId());
}
assertThat(hitIds, containsInAnyOrder(ids));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.opensearch.common.Nullable;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.index.analysis.IndexAnalyzers;
Expand Down Expand Up @@ -368,6 +371,18 @@ protected BytesRef indexedValueForSearch(Object value) {
}
return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString());
}

@Override
public Query wildcardQuery(
String value,
@Nullable MultiTermQuery.RewriteMethod method,
boolean caseInsensitve,
QueryShardContext context
) {
// keyword field types are always normalized, so ignore case sensitivity and force normalize the wildcard
// query text
return super.wildcardQuery(value, method, caseInsensitve, true, context);
}
}

private final boolean indexed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ public Query prefixQuery(
) {
throw new QueryShardException(
context,
"Can only use prefix queries on keyword, text and wildcard fields - not on [" + name + "] which is of type [" + typeName() + "]"
"Can only use prefix queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

Expand All @@ -290,6 +290,7 @@ public final Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteM
return wildcardQuery(value, method, false, context);
}

/** optionally normalize the wildcard pattern based on the value of {@code caseInsensitive} */
public Query wildcardQuery(
String value,
@Nullable MultiTermQuery.RewriteMethod method,
Expand All @@ -298,11 +299,15 @@ public Query wildcardQuery(
) {
throw new QueryShardException(
context,
"Can only use wildcard queries on keyword, text and wildcard fields - not on ["
+ name
+ "] which is of type ["
+ typeName()
+ "]"
"Can only use wildcard queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

/** always normalizes the wildcard pattern to lowercase */
public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) {
throw new QueryShardException(
context,
"Can only use wildcard queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,34 @@ public static final String normalizeWildcardPattern(String fieldname, String val
return sb.toBytesRef().utf8ToString();
}

/** optionally normalize the wildcard pattern based on the value of {@code caseInsensitive} */
@Override
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, QueryShardContext context) {
return wildcardQuery(value, method, caseInsensitive, false, context);
}

/** always normalizes the wildcard pattern to lowercase */
@Override
public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
return wildcardQuery(value, method, false, true, context);
}

/**
* return a wildcard query
*
* @param value the pattern
* @param method rewrite method
* @param caseInsensitive should ignore case; note, only used if there is no analyzer, else we use the analyzer rules
* @param normalizeIfAnalyzed force normalize casing if an analyzer is used
* @param context the query shard context
*/
public Query wildcardQuery(
String value,
MultiTermQuery.RewriteMethod method,
boolean caseInsensitive,
boolean normalizeIfAnalyzed,
QueryShardContext context
) {
failIfNotIndexed();
if (context.allowExpensiveQueries() == false) {
throw new OpenSearchException(
Expand All @@ -162,7 +188,7 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo
}

Term term;
if (getTextSearchInfo().getSearchAnalyzer() != null) {
if (getTextSearchInfo().getSearchAnalyzer() != null && normalizeIfAnalyzed) {
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
term = new Term(name(), value);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,8 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) {
throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
}
return currentFieldType.wildcardQuery(termStr, getMultiTermRewriteMethod(), context);
// query string query is always normalized
return currentFieldType.normalizedWildcardQuery(termStr, getMultiTermRewriteMethod(), context);
} catch (RuntimeException e) {
if (lenient) {
return newLenientFieldQuery(field, e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public void testNumeric() throws Exception {
QueryShardContext context = createShardContext();
QueryShardException e = expectThrows(QueryShardException.class, () -> query.toQuery(context));
assertEquals(
"Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]",
"Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
e.getMessage()
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,7 @@ public void testPrefixNumeric() throws Exception {
QueryShardContext context = createShardContext();
QueryShardException e = expectThrows(QueryShardException.class, () -> query.toQuery(context));
assertEquals(
"Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]",
"Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
e.getMessage()
);
query.lenient(true);
Expand Down