Skip to content

Commit

Permalink
Merge branch 'main' into remove-TermInSet-getTermData-usage
Browse files Browse the repository at this point in the history
  • Loading branch information
cbuescher committed Sep 19, 2024
2 parents 7ef6dc8 + 2d47cd7 commit a1f6389
Show file tree
Hide file tree
Showing 12 changed files with 338 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .buildkite/pipelines/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ steps:
machineType: n2-standard-8
buildDirectory: /dev/shm/bk
diskSizeGb: 250
if: build.branch == "main" || build.branch == "7.17"
if: build.branch == "main" || build.branch == "8.x" || build.branch == "7.17"
- label: check-branch-consistency
command: .ci/scripts/run-gradle.sh branchConsistency
timeout_in_minutes: 15
Expand Down
6 changes: 6 additions & 0 deletions docs/changelog/113027.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 113027
summary: Retrieve the source for objects and arrays in a separate parsing phase
area: Mapping
type: bug
issues:
- 112374
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Collectors;

Expand All @@ -32,7 +32,7 @@ class SourceTransforms {
* @return flattened map
*/
public static Map<String, List<Object>> normalize(Map<String, Object> map) {
var flattened = new HashMap<String, List<Object>>();
var flattened = new TreeMap<String, List<Object>>();

descend(null, map, flattened);

Expand Down
12 changes: 3 additions & 9 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,6 @@ tests:
- class: org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT
method: testRankDocsRetrieverWithCollapse
issue: https://github.com/elastic/elasticsearch/issues/112254
- class: org.elasticsearch.datastreams.logsdb.qa.StandardVersusLogsIndexModeRandomDataChallengeRestIT
method: testMatchAllQuery
issue: https://github.com/elastic/elasticsearch/issues/112374
- class: org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT
method: test {yaml=reference/rest-api/watcher/put-watch/line_120}
issue: https://github.com/elastic/elasticsearch/issues/99517
Expand Down Expand Up @@ -201,12 +198,6 @@ tests:
- class: org.elasticsearch.xpack.ml.integration.MlJobIT
method: testDeleteJobAfterMissingAliases
issue: https://github.com/elastic/elasticsearch/issues/112823
- class: org.elasticsearch.datastreams.logsdb.qa.StandardVersusLogsIndexModeRandomDataChallengeRestIT
method: testDateHistogramAggregation
issue: https://github.com/elastic/elasticsearch/issues/112919
- class: org.elasticsearch.datastreams.logsdb.qa.StandardVersusLogsIndexModeRandomDataChallengeRestIT
method: testTermsQuery
issue: https://github.com/elastic/elasticsearch/issues/112462
- class: org.elasticsearch.repositories.blobstore.testkit.analyze.HdfsRepositoryAnalysisRestIT
issue: https://github.com/elastic/elasticsearch/issues/112889
- class: org.elasticsearch.xpack.ml.integration.MlJobIT
Expand Down Expand Up @@ -238,6 +229,9 @@ tests:
- class: org.elasticsearch.packaging.test.WindowsServiceTests
method: test30StartStop
issue: https://github.com/elastic/elasticsearch/issues/113160
- class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT
method: test {p0=indices.create/21_synthetic_source_stored/index param - nested array within array}
issue: https://github.com/elastic/elasticsearch/issues/113173

# Examples:
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ object array in object with dynamic override:
body:
- '{ "create": { } }'
- '{ "path_no": [ { "some_int": 10 }, {"name": "foo"} ], "path_runtime": [ { "some_int": 20 }, {"name": "bar"} ], "name": "baz" }'
- match: { errors: false }

- do:
search:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,52 @@ object param - nested object with stored array:
- match: { hits.hits.1._source.nested_array_stored.1.b.1.c: 200 }


---
index param - nested array within array:
- requires:
cluster_features: ["mapper.synthetic_source_keep"]
reason: requires tracking ignored source

- do:
indices.create:
index: test
body:
mappings:
_source:
mode: synthetic
properties:
name:
type: keyword
path:
properties:
to:
properties:
some:
store_array_source: true
properties:
id:
type: integer

- do:
bulk:
index: test
refresh: true
body:
- '{ "create": { } }'
- '{ "name": "A", "path": [ { "to": [ { "some" : [ { "id": 10 }, { "id": [1, 3, 2] } ] }, { "some": { "id": 100 } } ] }, { "to": { "some": { "id": [1000, 2000] } } } ] }'
- match: { errors: false }

- do:
search:
index: test
sort: name
- match: { hits.hits.0._source.name: A }
- match: { hits.hits.0._source.path.to.some.0.id: 10 }
- match: { hits.hits.0._source.path.to.some.1.id: [ 1, 3, 2] }
- match: { hits.hits.0._source.path.to.some.2.id: 100 }
- match: { hits.hits.0._source.path.to.some.3.id: [ 1000, 2000 ] }


---
# 112156
stored field under object with store_array_source:
Expand Down
171 changes: 138 additions & 33 deletions server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.fielddata.FieldDataContext;
Expand All @@ -36,13 +35,16 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;

import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
Expand Down Expand Up @@ -118,8 +120,7 @@ public String documentDescription() {
};
}

private static void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers, DocumentParserContext context) {

private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers, DocumentParserContext context) {
try {
final boolean emptyDoc = isEmptyDoc(context.root(), context.parser());

Expand Down Expand Up @@ -147,6 +148,9 @@ private static void internalParseDocument(MetadataFieldMapper[] metadataFieldsMa

executeIndexTimeScripts(context);

// Record additional entries for {@link IgnoredSourceFieldMapper} before calling #postParse, so that they get stored.
addIgnoredSourceMissingValues(context);

for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) {
metadataMapper.postParse(context);
}
Expand All @@ -155,6 +159,122 @@ private static void internalParseDocument(MetadataFieldMapper[] metadataFieldsMa
}
}

private void addIgnoredSourceMissingValues(DocumentParserContext context) throws IOException {
Collection<IgnoredSourceFieldMapper.NameValue> ignoredFieldsMissingValues = context.getIgnoredFieldsMissingValues();
if (ignoredFieldsMissingValues.isEmpty()) {
return;
}

assert context.mappingLookup().isSourceSynthetic();
try (
XContentParser parser = XContentHelper.createParser(
parserConfiguration,
context.sourceToParse().source(),
context.sourceToParse().getXContentType()
)
) {
DocumentParserContext newContext = new RootDocumentParserContext(
context.mappingLookup(),
mappingParserContext,
context.sourceToParse(),
parser
);
var nameValues = parseDocForMissingValues(newContext, ignoredFieldsMissingValues);
for (var nameValue : nameValues) {
context.addIgnoredField(nameValue);
}
}
}

/**
* Simplified parsing version for retrieving the source of a given set of fields.
*/
private static List<IgnoredSourceFieldMapper.NameValue> parseDocForMissingValues(
DocumentParserContext context,
Collection<IgnoredSourceFieldMapper.NameValue> ignoredFieldsMissingValues
) throws IOException {
// Maps full name to the corresponding NameValue entry.
Map<String, IgnoredSourceFieldMapper.NameValue> fields = new HashMap<>(ignoredFieldsMissingValues.size());
for (var field : ignoredFieldsMissingValues) {
fields.put(field.name(), field);
}

// Generate all possible parent names for the given fields.
// This is used to skip processing objects that can't generate missing values.
Set<String> parentNames = getPossibleParentNames(fields.keySet());
List<IgnoredSourceFieldMapper.NameValue> result = new ArrayList<>();

XContentParser parser = context.parser();
XContentParser.Token currentToken = parser.nextToken();
List<String> path = new ArrayList<>();
String fieldName = null;
while (currentToken != null) {
while (currentToken != XContentParser.Token.FIELD_NAME) {
if (fieldName != null
&& (currentToken == XContentParser.Token.START_OBJECT || currentToken == XContentParser.Token.START_ARRAY)) {
if (parentNames.contains(getCurrentPath(path, fieldName)) == false) {
// No missing value under this parsing subtree, skip it.
parser.skipChildren();
} else {
path.add(fieldName);
}
fieldName = null;
} else if (currentToken == XContentParser.Token.END_OBJECT || currentToken == XContentParser.Token.END_ARRAY) {
if (currentToken == XContentParser.Token.END_OBJECT && path.isEmpty() == false) {
path.removeLast();
}
fieldName = null;
}
currentToken = parser.nextToken();
if (currentToken == null) {
return result;
}
}
fieldName = parser.currentName();
String fullName = getCurrentPath(path, fieldName);
var leaf = fields.get(fullName); // There may be multiple matches for array elements, don't use #remove.
if (leaf != null) {
parser.nextToken(); // Advance the parser to the value to be read.
result.add(leaf.cloneWithValue(XContentDataHelper.encodeToken(parser)));
parser.nextToken(); // Skip the token ending the value.
fieldName = null;
}
currentToken = parser.nextToken();
}
return result;
}

private static String getCurrentPath(List<String> path, String fieldName) {
assert fieldName != null;
return path.isEmpty() ? fieldName : String.join(".", path) + "." + fieldName;
}

/**
* Generates all possible parent object names for the given full names.
* For instance, for input ['path.to.foo', 'another.path.to.bar'], it returns:
* [ 'path', 'path.to', 'another', 'another.path', 'another.path.to' ]
*/
private static Set<String> getPossibleParentNames(Set<String> fullPaths) {
if (fullPaths.isEmpty()) {
return Collections.emptySet();
}
Set<String> paths = new HashSet<>();
for (String fullPath : fullPaths) {
String[] split = fullPath.split("\\.");
if (split.length < 2) {
continue;
}
StringBuilder builder = new StringBuilder(split[0]);
paths.add(builder.toString());
for (int i = 1; i < split.length - 1; i++) {
builder.append(".");
builder.append(split[i]);
paths.add(builder.toString());
}
}
return paths;
}

private static void executeIndexTimeScripts(DocumentParserContext context) {
List<FieldMapper> indexTimeScriptMappers = context.mappingLookup().indexTimeScriptMappers();
if (indexTimeScriptMappers.isEmpty()) {
Expand Down Expand Up @@ -299,16 +419,14 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio
if (context.parent().isNested()) {
// Handle a nested object that doesn't contain an array. Arrays are handled in #parseNonDynamicArray.
if (context.parent().storeArraySource() && context.canAddIgnoredField()) {
Tuple<DocumentParserContext, XContentBuilder> tuple = XContentDataHelper.cloneSubContext(context);
context.addIgnoredField(
context = context.addIgnoredFieldFromContext(
new IgnoredSourceFieldMapper.NameValue(
context.parent().fullPath(),
context.parent().fullPath().lastIndexOf(context.parent().leafName()),
XContentDataHelper.encodeXContentBuilder(tuple.v2()),
null,
context.doc()
)
);
context = tuple.v1();
token = context.parser().currentToken();
parser = context.parser();
}
Expand Down Expand Up @@ -445,17 +563,10 @@ static void parseObjectOrField(DocumentParserContext context, Mapper mapper) thr
if (context.canAddIgnoredField()
&& (fieldMapper.syntheticSourceMode() == FieldMapper.SyntheticSourceMode.FALLBACK
|| (context.isWithinCopyTo() == false && context.isCopyToDestinationField(mapper.fullPath())))) {
Tuple<DocumentParserContext, XContentBuilder> contextWithSourceToStore = XContentDataHelper.cloneSubContext(context);

context.addIgnoredField(
IgnoredSourceFieldMapper.NameValue.fromContext(
context,
fieldMapper.fullPath(),
XContentDataHelper.encodeXContentBuilder(contextWithSourceToStore.v2())
)
context = context.addIgnoredFieldFromContext(
IgnoredSourceFieldMapper.NameValue.fromContext(context, fieldMapper.fullPath(), null)
);

fieldMapper.parse(contextWithSourceToStore.v1());
fieldMapper.parse(context);
} else {
fieldMapper.parse(context);
}
Expand Down Expand Up @@ -553,16 +664,9 @@ private static void parseObjectDynamic(DocumentParserContext context, String cur
// hence we don't dynamically create empty objects under properties, but rather carry around an artificial object mapper
dynamicObjectMapper = new NoOpObjectMapper(currentFieldName, context.path().pathAsText(currentFieldName));
if (context.canAddIgnoredField()) {
// Clone the DocumentParserContext to parse its subtree twice.
Tuple<DocumentParserContext, XContentBuilder> tuple = XContentDataHelper.cloneSubContext(context);
context.addIgnoredField(
IgnoredSourceFieldMapper.NameValue.fromContext(
context,
context.path().pathAsText(currentFieldName),
XContentDataHelper.encodeXContentBuilder(tuple.v2())
)
context = context.addIgnoredFieldFromContext(
IgnoredSourceFieldMapper.NameValue.fromContext(context, context.path().pathAsText(currentFieldName), null)
);
context = tuple.v1();
}
} else {
dynamicObjectMapper = DynamicFieldsBuilder.createDynamicObjectMapper(context, currentFieldName);
Expand Down Expand Up @@ -686,10 +790,10 @@ private static void parseNonDynamicArray(
final String lastFieldName,
String arrayFieldName
) throws IOException {
String fullPath = context.path().pathAsText(arrayFieldName);

// Check if we need to record the array source. This only applies to synthetic source.
if (context.canAddIgnoredField()) {
String fullPath = context.path().pathAsText(arrayFieldName);

boolean objectRequiresStoringSource = mapper instanceof ObjectMapper objectMapper
&& (objectMapper.storeArraySource()
|| (context.sourceKeepModeFromIndexSettings() == Mapper.SourceKeepMode.ARRAYS
Expand All @@ -706,11 +810,7 @@ private static void parseNonDynamicArray(
|| dynamicRuntimeContext
|| fieldWithStoredArraySource
|| copyToFieldHasValuesInDocument) {
Tuple<DocumentParserContext, XContentBuilder> tuple = XContentDataHelper.cloneSubContext(context);
context.addIgnoredField(
IgnoredSourceFieldMapper.NameValue.fromContext(context, fullPath, XContentDataHelper.encodeXContentBuilder(tuple.v2()))
);
context = tuple.v1();
context = context.addIgnoredFieldFromContext(IgnoredSourceFieldMapper.NameValue.fromContext(context, fullPath, null));
} else if (mapper instanceof ObjectMapper objectMapper
&& (objectMapper.isEnabled() == false || objectMapper.dynamic == ObjectMapper.Dynamic.FALSE)) {
context.addIgnoredField(
Expand All @@ -720,6 +820,11 @@ private static void parseNonDynamicArray(
}
}

// In synthetic source, if any array element requires storing its source as-is, it takes precedence over
// elements from regular source loading that are then skipped from the synthesized array source.
// To prevent this, we track each array name, to check if it contains any sub-arrays in its elements.
context = context.cloneForArray(fullPath);

XContentParser parser = context.parser();
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
Expand Down
Loading

0 comments on commit a1f6389

Please sign in to comment.