Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Construct dynamic updates directly via object builders #81449

Merged
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f54f87a
WIP
romseygeek Oct 7, 2021
578179d
Merge remote-tracking branch 'origin/master' into mapper/dots-in-leaf…
romseygeek Oct 11, 2021
28233e3
Add 'flatten' option to ObjectMapper
romseygeek Oct 12, 2021
2b8b94f
Merge remote-tracking branch 'origin/master' into mapper/dots-in-leaf…
romseygeek Oct 12, 2021
75ca57c
tidying up
romseygeek Oct 12, 2021
4ee36b4
tidy up
romseygeek Oct 13, 2021
81a3613
Merge remote-tracking branch 'origin/master' into mapper/dots-in-leaf…
romseygeek Oct 13, 2021
4a47404
Merge remote-tracking branch 'origin/master' into mapper/dots-in-leaf…
romseygeek Oct 18, 2021
4e977bf
Merge remote-tracking branch 'origin/master' into mapper/dynamic-upda…
romseygeek Dec 7, 2021
f328c83
Merge remote-tracking branch 'origin/master' into mapper/dynamic-upda…
romseygeek Dec 7, 2021
0078b19
spotless
romseygeek Dec 7, 2021
ec17691
Merge remote-tracking branch 'origin/master' into mapper/dynamic-upda…
romseygeek Dec 7, 2021
cc75426
remove copyonwritehashmap from ObjectMapper
romseygeek Dec 7, 2021
39ade03
oops
romseygeek Dec 8, 2021
4c362c6
Merge branch 'master' into mapper/dynamic-update-without-merging
elasticmachine Dec 8, 2021
087886a
spotless
romseygeek Dec 8, 2021
824f12c
Merge remote-tracking branch 'origin/master' into mapper/dynamic-upda…
romseygeek Dec 15, 2021
005079f
Merge remote-tracking branch 'origin/master' into mapper/dynamic-upda…
romseygeek Dec 20, 2021
9f4da05
deef
romseygeek Dec 20, 2021
b951025
Merge remote-tracking branch 'origin/master' into mapper/dynamic-upda…
romseygeek Jan 6, 2022
6f1f42c
deef
romseygeek Jan 6, 2022
b8d4d18
enforce that dynamic intermediate objects have been created already; …
romseygeek Jan 6, 2022
6e94b9b
Merge remote-tracking branch 'origin/master' into mapper/dynamic-upda…
romseygeek Jan 10, 2022
0832808
Merge remote-tracking branch 'origin/master' into mapper/dynamic-upda…
romseygeek Jan 10, 2022
efc371f
spotless
romseygeek Jan 10, 2022
025be06
Add back check for empty field name
romseygeek Jan 12, 2022
8dc3fcf
Merge remote-tracking branch 'origin/master' into mapper/dynamic-upda…
romseygeek Jan 12, 2022
660d3db
Merge branch 'master' into mapper/dynamic-update-without-merging
elasticmachine Jan 18, 2022
be672cc
Merge branch 'master' into mapper/dynamic-update-without-merging
elasticmachine Jan 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ private void expandDots() throws IOException {
if (subpaths.length == 0) {
throw new IllegalArgumentException("field name cannot contain only dots: [" + field + "]");
}
if (subpaths.length == 1) {
// Corner case: if the input has a single trailing '.', eg 'field.', then we will get a single
// subpath due to the way String.split() works. We can only return fast here if this is not
// the case
// TODO make this case throw an error instead? https://github.com/elastic/elasticsearch/issues/28948
if (subpaths.length == 1 && field.endsWith(".") == false) {
return;
}
Token token = delegate().nextToken();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ public void testEmbeddedValue() throws IOException {

}

public void testTrailingDotsAreStripped() throws IOException {

assertXContentMatches("""
{"test":{"with":{"dots":"value"}},"nodots":"value"}""", """
{"test.":{"with.":{"dots":"value"}},"nodots":"value"}""");

}

public void testSkipChildren() throws IOException {
XContentParser parser = DotExpandingXContentParser.expandDots(createParser(JsonXContent.jsonXContent, """
{ "test.with.dots" : "value", "nodots" : "value2" }"""));
Expand Down
209 changes: 17 additions & 192 deletions server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.apache.lucene.search.Query;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.xcontent.XContentHelper;
Expand All @@ -33,7 +32,6 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
Expand Down Expand Up @@ -97,7 +95,7 @@ public ParsedDocument parseDocument(SourceToParse source, MappingLookup mappingL
context.reorderParentAndGetDocs(),
context.sourceToParse().source(),
context.sourceToParse().getXContentType(),
createDynamicUpdate(mappingLookup, context.getDynamicMappers(), context.getDynamicRuntimeFields())
createDynamicUpdate(context)
);
}

Expand Down Expand Up @@ -207,198 +205,20 @@ private static MapperParsingException wrapInMapperParsingException(SourceToParse
return new MapperParsingException("failed to parse", e);
}

private static String[] splitAndValidatePath(String fullFieldPath) {
if (fullFieldPath.contains(".")) {
String[] parts = fullFieldPath.split("\\.");
if (parts.length == 0) {
throw new IllegalArgumentException("field name cannot contain only dots");
}
for (String part : parts) {
if (Strings.hasText(part) == false) {
// check if the field name contains only whitespace
if (Strings.isEmpty(part) == false) {
throw new IllegalArgumentException("object field cannot contain only whitespace: ['" + fullFieldPath + "']");
}
throw new IllegalArgumentException(
"object field starting or ending with a [.] makes object resolution ambiguous: [" + fullFieldPath + "]"
);
}
}
return parts;
} else {
if (Strings.isEmpty(fullFieldPath)) {
throw new IllegalArgumentException("field name cannot be an empty string");
}
return new String[] { fullFieldPath };
}
}

/**
* Creates a Mapping containing any dynamically added fields, or returns null if there were no dynamic mappings.
*/
static Mapping createDynamicUpdate(MappingLookup mappingLookup, List<Mapper> dynamicMappers, List<RuntimeField> dynamicRuntimeFields) {
if (dynamicMappers.isEmpty() && dynamicRuntimeFields.isEmpty()) {
static Mapping createDynamicUpdate(DocumentParserContext context) {
if (context.getDynamicMappers().isEmpty() && context.getDynamicRuntimeFields().isEmpty()) {
return null;
}
RootObjectMapper root;
if (dynamicMappers.isEmpty() == false) {
root = createDynamicUpdate(mappingLookup, dynamicMappers);
root.fixRedundantIncludes();
} else {
root = mappingLookup.getMapping().getRoot().copyAndReset();
}
root.addRuntimeFields(dynamicRuntimeFields);
return mappingLookup.getMapping().mappingUpdate(root);
}

private static RootObjectMapper createDynamicUpdate(MappingLookup mappingLookup, List<Mapper> dynamicMappers) {

// We build a mapping by first sorting the mappers, so that all mappers containing a common prefix
// will be processed in a contiguous block. When the prefix is no longer seen, we pop the extra elements
// off the stack, merging them upwards into the existing mappers.
dynamicMappers.sort(Comparator.comparing(Mapper::name));
Iterator<Mapper> dynamicMapperItr = dynamicMappers.iterator();
List<ObjectMapper> parentMappers = new ArrayList<>();
Mapper firstUpdate = dynamicMapperItr.next();
parentMappers.add(createUpdate(mappingLookup.getMapping().getRoot(), splitAndValidatePath(firstUpdate.name()), 0, firstUpdate));
Mapper previousMapper = null;
while (dynamicMapperItr.hasNext()) {
Mapper newMapper = dynamicMapperItr.next();
if (previousMapper != null && newMapper.name().equals(previousMapper.name())) {
// We can see the same mapper more than once, for example, if we had foo.bar and foo.baz, where
// foo did not yet exist. This will create 2 copies in dynamic mappings, which should be identical.
// Here we just skip over the duplicates, but we merge them to ensure there are no conflicts.
newMapper.merge(previousMapper);
continue;
}
previousMapper = newMapper;
String[] nameParts = splitAndValidatePath(newMapper.name());

// We first need the stack to only contain mappers in common with the previously processed mapper
// For example, if the first mapper processed was a.b.c, and we now have a.d, the stack will contain
// a.b, and we want to merge b back into the stack so it just contains a
int i = removeUncommonMappers(parentMappers, nameParts);

// Then we need to add back mappers that may already exist within the stack, but are not on it.
// For example, if we processed a.b, followed by an object mapper a.c.d, and now are adding a.c.d.e
// then the stack will only have a on it because we will have already merged a.c.d into the stack.
// So we need to pull a.c, followed by a.c.d, onto the stack so e can be added to the end.
i = expandCommonMappers(parentMappers, nameParts, i);

// If there are still parents of the new mapper which are not on the stack, we need to pull them
// from the existing mappings. In order to maintain the invariant that the stack only contains
// fields which are updated, we cannot simply add the existing mappers to the stack, since they
// may have other subfields which will not be updated. Instead, we pull the mapper from the existing
// mappings, and build an update with only the new mapper and its parents. This then becomes our
// "new mapper", and can be added to the stack.
if (i < nameParts.length - 1) {
newMapper = createExistingMapperUpdate(parentMappers, nameParts, i, mappingLookup, newMapper);
}

if (newMapper instanceof ObjectMapper) {
parentMappers.add((ObjectMapper) newMapper);
} else {
addToLastMapper(parentMappers, newMapper, true);
}
}
popMappers(parentMappers, 1, true);
assert parentMappers.size() == 1;
return (RootObjectMapper) parentMappers.get(0);
}

private static void popMappers(List<ObjectMapper> parentMappers, int keepBefore, boolean merge) {
assert keepBefore >= 1; // never remove the root mapper
// pop off parent mappers not needed by the current mapper,
// merging them backwards since they are immutable
for (int i = parentMappers.size() - 1; i >= keepBefore; --i) {
addToLastMapper(parentMappers, parentMappers.remove(i), merge);
RootObjectMapper.Builder rootBuilder = context.updateRoot();
for (Mapper mapper : context.getDynamicMappers()) {
rootBuilder.addDynamic(mapper.name(), null, mapper, context);
}
}

/**
* Adds a mapper as an update into the last mapper. If merge is true, the new mapper
* will be merged in with other child mappers of the last parent, otherwise it will be a new update.
*/
private static void addToLastMapper(List<ObjectMapper> parentMappers, Mapper mapper, boolean merge) {
assert parentMappers.size() >= 1;
int lastIndex = parentMappers.size() - 1;
ObjectMapper withNewMapper = parentMappers.get(lastIndex).mappingUpdate(mapper);
if (merge) {
withNewMapper = parentMappers.get(lastIndex).merge(withNewMapper);
}
parentMappers.set(lastIndex, withNewMapper);
}

/**
* Removes mappers that exist on the stack, but are not part of the path of the current nameParts,
* Returns the next unprocessed index from nameParts.
*/
private static int removeUncommonMappers(List<ObjectMapper> parentMappers, String[] nameParts) {
int keepBefore = 1;
while (keepBefore < parentMappers.size() && parentMappers.get(keepBefore).simpleName().equals(nameParts[keepBefore - 1])) {
++keepBefore;
for (RuntimeField runtimeField : context.getDynamicRuntimeFields()) {
rootBuilder.addRuntimeField(runtimeField);
}
popMappers(parentMappers, keepBefore, true);
return keepBefore - 1;
}

/**
* Adds mappers from the end of the stack that exist as updates within those mappers.
* Returns the next unprocessed index from nameParts.
*/
private static int expandCommonMappers(List<ObjectMapper> parentMappers, String[] nameParts, int i) {
ObjectMapper last = parentMappers.get(parentMappers.size() - 1);
while (i < nameParts.length - 1 && last.getMapper(nameParts[i]) != null) {
Mapper newLast = last.getMapper(nameParts[i]);
assert newLast instanceof ObjectMapper;
last = (ObjectMapper) newLast;
parentMappers.add(last);
++i;
}
return i;
}

/**
* Creates an update for intermediate object mappers that are not on the stack, but parents of newMapper.
*/
private static ObjectMapper createExistingMapperUpdate(
List<ObjectMapper> parentMappers,
String[] nameParts,
int i,
MappingLookup mappingLookup,
Mapper newMapper
) {
String updateParentName = nameParts[i];
final ObjectMapper lastParent = parentMappers.get(parentMappers.size() - 1);
if (parentMappers.size() > 1) {
// only prefix with parent mapper if the parent mapper isn't the root (which has a fake name)
updateParentName = lastParent.name() + '.' + nameParts[i];
}
ObjectMapper updateParent = mappingLookup.objectMappers().get(updateParentName);
assert updateParent != null : updateParentName + " doesn't exist";
return createUpdate(updateParent, nameParts, i + 1, newMapper);
}

/**
* Build an update for the parent which will contain the given mapper and any intermediate fields.
*/
private static ObjectMapper createUpdate(ObjectMapper parent, String[] nameParts, int i, Mapper mapper) {
List<ObjectMapper> parentMappers = new ArrayList<>();
ObjectMapper previousIntermediate = parent;
for (; i < nameParts.length - 1; ++i) {
Mapper intermediate = previousIntermediate.getMapper(nameParts[i]);
assert intermediate != null : "Field " + previousIntermediate.name() + " does not have a subfield " + nameParts[i];
assert intermediate instanceof ObjectMapper;
parentMappers.add((ObjectMapper) intermediate);
previousIntermediate = (ObjectMapper) intermediate;
}
if (parentMappers.isEmpty() == false) {
// add the new mapper to the stack, and pop down to the original parent level
addToLastMapper(parentMappers, mapper, false);
popMappers(parentMappers, 1, false);
mapper = parentMappers.get(0);
}
return parent.mappingUpdate(mapper);
RootObjectMapper root = rootBuilder.build(MapperBuilderContext.ROOT);
root.fixRedundantIncludes();
return context.mappingLookup().getMapping().mappingUpdate(root);
}

static void parseObjectOrNested(DocumentParserContext context, ObjectMapper mapper) throws IOException {
Expand Down Expand Up @@ -453,6 +273,11 @@ private static void innerParseObject(DocumentParserContext context, ObjectMapper
while (token != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = context.parser().currentName();
if (currentFieldName.isBlank()) {
throw new MapperParsingException(
"Field name cannot contain only whitespace: [" + context.path().pathAsText(currentFieldName) + "]"
);
}
} else if (token == XContentParser.Token.START_OBJECT) {
parseObject(context, mapper, currentFieldName);
} else if (token == XContentParser.Token.START_ARRAY) {
Expand Down Expand Up @@ -759,7 +584,7 @@ private static ObjectMapper.Dynamic dynamicOrDefault(ObjectMapper parentMapper,
parentMapper = context.mappingLookup().objectMappers().get(parentName);
if (parentMapper == null) {
// If parentMapper is null, it means the parent of the current mapper is being dynamically created right now
parentMapper = context.getObjectMapper(parentName);
parentMapper = context.getDynamicObjectMapper(parentName);
if (parentMapper == null) {
// it can still happen that the path is ambiguous and we are not able to locate the parent
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ public final boolean isShadowed(String field) {
return mappingLookup.isShadowed(field);
}

public final ObjectMapper getObjectMapper(String name) {
public final ObjectMapper getDynamicObjectMapper(String name) {
return dynamicObjectMappers.get(name);
}

Expand All @@ -250,6 +250,13 @@ public final List<RuntimeField> getDynamicRuntimeFields() {
*/
public abstract Iterable<LuceneDocument> nonRootDocuments();

/**
* @return a RootObjectMapper.Builder to be used to construct a dynamic mapping update
*/
public final RootObjectMapper.Builder updateRoot() {
return mappingLookup.getMapping().getRoot().newBuilder(indexSettings.getIndexVersionCreated());
}

public boolean isWithinCopyTo() {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.index.mapper;

import org.elasticsearch.common.Strings;
import org.elasticsearch.xcontent.ToXContentFragment;

import java.util.Map;
Expand Down Expand Up @@ -65,4 +66,9 @@ public final String simpleName() {
* @param mappers a {@link MappingLookup} that can produce references to other mappers
*/
public abstract void validate(MappingLookup mappers);

@Override
public String toString() {
return Strings.toString(this);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,16 @@ public Map<String, Mapper> getChildren() {
return Collections.unmodifiableMap(this.mappers);
}

@Override
public ObjectMapper.Builder newBuilder(Version indexVersionCreated) {
NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(simpleName(), indexVersionCreated);
builder.enabled = enabled;
builder.dynamic = dynamic;
builder.includeInRoot = includeInRoot;
builder.includeInParent = includeInParent;
return builder;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(simpleName());
Expand Down
Loading