Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize source filtering in SourceFieldMapper #81970

Merged
merged 13 commits into from
Jan 12, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,31 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.CheckedBiFunction;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.Set;

public class SourceFieldMapper extends MetadataFieldMapper {

public static final String NAME = "_source";
public static final String RECOVERY_SOURCE_NAME = "_recovery_source";

public static final String CONTENT_TYPE = "_source";
private final Function<Map<String, ?>, Map<String, Object>> filter;
private final CheckedBiFunction<BytesReference, XContentType, BytesReference, IOException> filter;

private static final SourceFieldMapper DEFAULT = new SourceFieldMapper(Defaults.ENABLED, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);

Expand Down Expand Up @@ -145,7 +142,18 @@ private SourceFieldMapper(boolean enabled, String[] includes, String[] excludes)
this.includes = includes;
this.excludes = excludes;
final boolean filtered = CollectionUtils.isEmpty(includes) == false || CollectionUtils.isEmpty(excludes) == false;
this.filter = enabled && filtered ? XContentMapValues.filter(includes, excludes) : null;
if (enabled && filtered) {
final XContentParserConfiguration parserConfig = XContentParserConfiguration.EMPTY.withFiltering(Set.of(includes), Set.of(excludes));
this.filter = (sourceBytes, contentType) -> {
BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, sourceBytes.length()));
XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), streamOutput);
XContentParser parser = XContentType.JSON.xContent().createParser(parserConfig, sourceBytes.streamInput());
builder.copyCurrentStructure(parser);
return BytesReference.bytes(builder);
mushao999 marked this conversation as resolved.
Show resolved Hide resolved
};
} else {
this.filter = (sourceBytes, contentType) -> sourceBytes;
}
this.complete = enabled && CollectionUtils.isEmpty(includes) && CollectionUtils.isEmpty(excludes);
}

Expand Down Expand Up @@ -180,18 +188,7 @@ public void preParse(DocumentParserContext context) throws IOException {
public BytesReference applyFilters(@Nullable BytesReference originalSource, @Nullable XContentType contentType) throws IOException {
if (enabled && originalSource != null) {
// Percolate and tv APIs may not set the source and that is ok, because these APIs will not index any data
if (filter != null) {
// we don't update the context source if we filter, we want to keep it as is...
Tuple<XContentType, Map<String, Object>> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType);
Map<String, Object> filteredSource = filter.apply(mapTuple.v2());
BytesStreamOutput bStream = new BytesStreamOutput();
XContentType actualContentType = mapTuple.v1();
XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource);
builder.close();
return bStream.bytes();
} else {
return originalSource;
}
return filter.apply(originalSource, contentType);
} else {
return null;
}
Expand Down