Skip to content

Commit

Permalink
ESQL: fix non-null value being returned for unsupported data types in…
Browse files Browse the repository at this point in the history
… ValueSources (#100656)

* Refine (and fix) all cases where an unsupported data type field's values
are returned from SourceValues.
* Improve unsupported data types handling in TopN
  • Loading branch information
astefan authored Oct 11, 2023
1 parent 616960c commit 29e3d28
Show file tree
Hide file tree
Showing 9 changed files with 175 additions and 14 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/100656.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 100656
summary: "ESQL: fix non-null value being returned for unsupported data types in `ValueSources`"
area: ES|QL
type: bug
issues:
- 100048
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ public static List<ValueSourceInfo> sources(
sources.add(new ValueSourceInfo(new NullValueSourceType(), new NullValueSource(), elementType, ctx.getIndexReader()));
continue; // the field does not exist in this context
}
if (asUnsupportedSource) {
sources.add(
new ValueSourceInfo(
new UnsupportedValueSourceType(fieldType.typeName()),
new UnsupportedValueSource(null),
elementType,
ctx.getIndexReader()
)
);
HeaderWarning.addWarning("Field [{}] cannot be retrieved, it is unsupported or not indexed; returning null", fieldName);
continue;
}

if (fieldType.hasDocValues() == false) {
// MatchOnlyTextFieldMapper class lives in the mapper-extras module. We use string equality
Expand Down Expand Up @@ -99,19 +111,7 @@ public static List<ValueSourceInfo> sources(
var fieldContext = new FieldContext(fieldName, fieldData, fieldType);
var vsType = fieldData.getValuesSourceType();
var vs = vsType.getField(fieldContext, null);

if (asUnsupportedSource) {
sources.add(
new ValueSourceInfo(
new UnsupportedValueSourceType(fieldType.typeName()),
new UnsupportedValueSource(vs),
elementType,
ctx.getIndexReader()
)
);
} else {
sources.add(new ValueSourceInfo(vsType, vs, elementType, ctx.getIndexReader()));
}
sources.add(new ValueSourceInfo(vsType, vs, elementType, ctx.getIndexReader()));
}

return sources;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public BytesRef decodeBytesRef(BytesRef bytes, BytesRef scratch) {

@Override
public String toString() {
return "DefaultUnsortable";
return "DefaultSortable";
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ public interface TopNEncoder {
*/
VersionTopNEncoder VERSION = new VersionTopNEncoder();

/**
* Placeholder encoder for unsupported data types.
*/
UnsupportedTypesTopNEncoder UNSUPPORTED = new UnsupportedTypesTopNEncoder();

void encodeLong(long value, BreakingBytesRefBuilder bytesRefBuilder);

long decodeLong(BytesRef bytes);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.compute.operator.topn;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;

/**
* TopNEncoder for data types that are unsupported. This is just a placeholder class, reaching the encode/decode methods here is a bug.
*
* While this class is needed to build the TopNOperator value and key extractors infrastructure, encoding/decoding is needed
* when actually sorting on a field (which shouldn't be possible for unsupported data types) using key extractors, or when encoding/decoding
* unsupported data types fields values (which should always be "null" by convention) using value extractors.
*/
class UnsupportedTypesTopNEncoder extends SortableTopNEncoder {
@Override
public int encodeBytesRef(BytesRef value, BreakingBytesRefBuilder bytesRefBuilder) {
throw new UnsupportedOperationException("Encountered a bug; trying to encode an unsupported data type value for TopN");
}

@Override
public BytesRef decodeBytesRef(BytesRef bytes, BytesRef scratch) {
throw new UnsupportedOperationException("Encountered a bug; trying to decode an unsupported data type value for TopN");
}

@Override
public String toString() {
return "UnsupportedTypesTopNEncoder";
}

@Override
public TopNEncoder toSortable() {
return this;
}

@Override
public TopNEncoder toUnsortable() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ setup:
---
load everything:
- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
esql.query:
body:
query: 'from test'
Expand Down Expand Up @@ -156,6 +158,8 @@ filter on counter:
---
from doc with aggregate_metric_double:
- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
esql.query:
body:
query: 'from test2'
Expand Down Expand Up @@ -183,6 +187,8 @@ stats on aggregate_metric_double:
---
from index pattern unsupported counter:
- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
esql.query:
body:
query: 'FROM test*'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,96 @@ unsupported:
- match: { columns.0.name: shape }
- match: { columns.0.type: unsupported }
- length: { values: 0 }

---
unsupported with sort:
- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
esql.query:
body:
query: 'from test | sort some_doc.bar'

- match: { columns.0.name: aggregate_metric_double }
- match: { columns.0.type: unsupported }
- match: { columns.1.name: binary }
- match: { columns.1.type: unsupported }
- match: { columns.2.name: completion }
- match: { columns.2.type: unsupported }
- match: { columns.3.name: date_nanos }
- match: { columns.3.type: unsupported }
- match: { columns.4.name: date_range }
- match: { columns.4.type: unsupported }
- match: { columns.5.name: dense_vector }
- match: { columns.5.type: unsupported }
- match: { columns.6.name: double_range }
- match: { columns.6.type: unsupported }
- match: { columns.7.name: float_range }
- match: { columns.7.type: unsupported }
- match: { columns.8.name: geo_point }
- match: { columns.8.type: unsupported }
- match: { columns.9.name: geo_point_alias }
- match: { columns.9.type: unsupported }
- match: { columns.10.name: histogram }
- match: { columns.10.type: unsupported }
- match: { columns.11.name: integer_range }
- match: { columns.11.type: unsupported }
- match: { columns.12.name: ip_range }
- match: { columns.12.type: unsupported }
- match: { columns.13.name: long_range }
- match: { columns.13.type: unsupported }
- match: { columns.14.name: match_only_text }
- match: { columns.14.type: text }
- match: { columns.15.name: name }
- match: { columns.15.type: keyword }
- match: { columns.16.name: rank_feature }
- match: { columns.16.type: unsupported }
- match: { columns.17.name: rank_features }
- match: { columns.17.type: unsupported }
- match: { columns.18.name: search_as_you_type }
- match: { columns.18.type: unsupported }
- match: { columns.19.name: search_as_you_type._2gram }
- match: { columns.19.type: unsupported }
- match: { columns.20.name: search_as_you_type._3gram }
- match: { columns.20.type: unsupported }
- match: { columns.21.name: search_as_you_type._index_prefix }
- match: { columns.21.type: unsupported }
- match: { columns.22.name: shape }
- match: { columns.22.type: unsupported }
- match: { columns.23.name: some_doc.bar }
- match: { columns.23.type: long }
- match: { columns.24.name: some_doc.foo }
- match: { columns.24.type: keyword }
- match: { columns.25.name: text }
- match: { columns.25.type: text }
- match: { columns.26.name: token_count }
- match: { columns.26.type: integer }

- length: { values: 1 }
- match: { values.0.0: null }
- match: { values.0.1: null }
- match: { values.0.2: null }
- match: { values.0.3: null }
- match: { values.0.4: null }
- match: { values.0.5: null }
- match: { values.0.6: null }
- match: { values.0.7: null }
- match: { values.0.8: null }
- match: { values.0.9: null }
- match: { values.0.10: null }
- match: { values.0.11: null }
- match: { values.0.12: null }
- match: { values.0.13: null }
- match: { values.0.14: "foo bar baz" }
- match: { values.0.15: Alice }
- match: { values.0.16: null }
- match: { values.0.17: null }
- match: { values.0.18: null }
- match: { values.0.19: null }
- match: { values.0.20: null }
- match: { values.0.21: null }
- match: { values.0.22: null }
- match: { values.0.23: 12 }
- match: { values.0.24: xy }
- match: { values.0.25: "foo bar" }
- match: { values.0.26: 3 }
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ disjoint_mappings:

---
same_name_different_type:
- skip:
features: allowed_warnings_regex
- do:
indices.create:
index: test1
Expand Down Expand Up @@ -307,6 +309,8 @@ same_name_different_type:
- { "message": 2 }

- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
esql.query:
body:
query: 'from test1,test2 '
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte
case "version" -> TopNEncoder.VERSION;
case "boolean", "null", "byte", "short", "integer", "long", "double", "float", "half_float", "datetime", "date_period",
"time_duration", "object", "nested", "scaled_float", "unsigned_long", "_doc" -> TopNEncoder.DEFAULT_SORTABLE;
// unsupported fields are encoded as BytesRef, we'll use the same encoder; all values should be null at this point
case "unsupported" -> TopNEncoder.UNSUPPORTED;
default -> throw new EsqlIllegalArgumentException("No TopN sorting encoder for type " + inverse.get(channel).type());
};
}
Expand Down

0 comments on commit 29e3d28

Please sign in to comment.