Skip to content

Commit

Permalink
Star tree codec changes
Browse files Browse the repository at this point in the history
Signed-off-by: Bharathwaj G <bharath78910@gmail.com>
  • Loading branch information
bharath-techie committed Jun 24, 2024
1 parent 9729a92 commit e797a2d
Show file tree
Hide file tree
Showing 12 changed files with 558 additions and 4 deletions.
19 changes: 15 additions & 4 deletions server/src/main/java/org/opensearch/index/codec/CodecService.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.opensearch.common.Nullable;
import org.opensearch.common.collect.MapBuilder;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.codec.composite.Composite99Codec;
import org.opensearch.index.mapper.MapperService;

import java.util.Map;
Expand Down Expand Up @@ -73,10 +74,20 @@ public CodecService(@Nullable MapperService mapperService, IndexSettings indexSe
codecs.put(BEST_COMPRESSION_CODEC, new Lucene99Codec(Mode.BEST_COMPRESSION));
codecs.put(ZLIB, new Lucene99Codec(Mode.BEST_COMPRESSION));
} else {
codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(LZ4, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(ZLIB, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
// CompositeCodec still delegates to PerFieldMappingPostingFormatCodec
// We can still support all the compression codecs when composite index is present
// hence we're defining the codecs like below
if (mapperService.isCompositeIndexPresent()) {
codecs.put(DEFAULT_CODEC, new Composite99Codec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(LZ4, new Composite99Codec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(BEST_COMPRESSION_CODEC, new Composite99Codec(Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(ZLIB, new Composite99Codec(Mode.BEST_COMPRESSION, mapperService, logger));
} else {
codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(LZ4, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(ZLIB, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
}
}
codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
for (String codec : Codec.availableCodecs()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.mapper.MapperService;

import java.io.IOException;

/**
* DocValues format to handle composite indices
*
* @opensearch.experimental
*/
@ExperimentalApi
public class Composite90DocValuesFormat extends DocValuesFormat {
/**
* Creates a new docvalues format.
*
* <p>The provided name will be written into the index segment in some configurations (such as
* when using {@code PerFieldDocValuesFormat}): in such configurations, for the segment to be read
* this class should be registered with Java's SPI mechanism (registered in META-INF/ of your jar
* file, etc).
*/
private final DocValuesFormat delegate;
private final MapperService mapperService;

// needed for SPI
public Composite90DocValuesFormat() {
this(new Lucene90DocValuesFormat(), null);
}

public Composite90DocValuesFormat(MapperService mapperService) {
this(new Lucene90DocValuesFormat(), mapperService);
}

public Composite90DocValuesFormat(DocValuesFormat delegate, MapperService mapperService) {
super(delegate.getName());
this.delegate = delegate;
this.mapperService = mapperService;
}

@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return new Composite90DocValuesWriter(delegate.fieldsConsumer(state), state, mapperService);
}

@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new Composite90DocValuesReader(delegate.fieldsProducer(state), state, mapperService);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.mapper.CompositeMappedFieldType;
import org.opensearch.index.mapper.MapperService;

import java.io.IOException;
import java.util.List;
import java.util.Set;

/**
* Reader for star tree index and star tree doc values from the segments
*
* @opensearch.experimental
*/
@ExperimentalApi
public class Composite90DocValuesReader extends DocValuesProducer implements CompositeIndexReader {
private DocValuesProducer delegate;
Set<CompositeMappedFieldType> compositeMappedFieldTypes;
MapperService mapperService;

public Composite90DocValuesReader(DocValuesProducer producer, SegmentReadState state, MapperService mapperService) throws IOException {
this.delegate = producer;
this.mapperService = mapperService;
this.compositeMappedFieldTypes = mapperService.getCompositeFieldTypes();
// TODO : read star tree files
}

@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
return delegate.getNumeric(field);
}

@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
return delegate.getBinary(field);
}

@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
return delegate.getSorted(field);
}

@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return delegate.getSortedNumeric(field);
}

@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
return delegate.getSortedSet(field);
}

@Override
public void checkIntegrity() throws IOException {
delegate.checkIntegrity();
// Todo : check integrity of composite index related [star tree] files
}

@Override
public void close() throws IOException {
delegate.close();
// Todo: close composite index related files [star tree] files
}

@Override
public List<String> getCompositeIndexFields() {
// todo : read from file formats and get the field names.
return null;
}

@Override
public CompositeIndexValues getCompositeIndexValues(String field, CompositeMappedFieldType.CompositeFieldType fieldType)
throws IOException {
// TODO : read compositeIndexValues [starTreeValues] from star tree files
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentWriteState;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.mapper.CompositeMappedFieldType;
import org.opensearch.index.mapper.MapperService;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/**
* This class write the star tree index and star tree doc values
* based on the doc values structures of the original index
*
* @opensearch.experimental
*/
@ExperimentalApi
public class Composite90DocValuesWriter extends DocValuesConsumer {
private final DocValuesConsumer delegate;
private final SegmentWriteState state;
private final MapperService mapperService;
private MergeState mergeState = null;
private final Set<CompositeMappedFieldType> compositeMappedFieldTypes;
private final Set<String> compositeFieldSet;

private final Map<String, DocValuesProducer> fieldProducerMap = new HashMap<>();
private final Map<String, FieldInfo> fieldToFieldInfoMap = new HashMap<>();

public Composite90DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService)
throws IOException {

this.delegate = delegate;
this.state = segmentWriteState;
this.mapperService = mapperService;
this.compositeMappedFieldTypes = mapperService.getCompositeFieldTypes();
compositeFieldSet = new HashSet<>();
for (CompositeMappedFieldType type : compositeMappedFieldTypes) {
compositeFieldSet.add(type.name());
}
}

@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addNumericField(field, valuesProducer);
}

@Override
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addBinaryField(field, valuesProducer);
}

@Override
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addSortedField(field, valuesProducer);
}

@Override
public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addSortedNumericField(field, valuesProducer);
// Perform this only during flush flow
if (mergeState == null) {
createCompositeIndicesIfPossible(valuesProducer, field);
}
}

@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addSortedSetField(field, valuesProducer);
}

@Override
public void close() throws IOException {

}

private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer, FieldInfo field) throws IOException {
if (compositeFieldSet.isEmpty()) return;
if (compositeFieldSet.contains(field.name)) {
fieldProducerMap.put(field.name, valuesProducer);
fieldToFieldInfoMap.put(field.name, field);
compositeFieldSet.remove(field.name);
}
// we have all the required fields to build composite fields
if (compositeFieldSet.isEmpty()) {
for (CompositeMappedFieldType mappedType : compositeMappedFieldTypes) {
if (mappedType.getCompositeIndexType().equals(CompositeMappedFieldType.CompositeFieldType.STAR_TREE)) {
// TODO : Call StarTree builder
}
}
}
}

@Override
public void merge(MergeState mergeState) throws IOException {
// TODO : check if class variable will cause concurrency issues
this.mergeState = mergeState;
super.merge(mergeState);
// TODO : handle merge star tree
// mergeStarTreeFields(mergeState);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
import org.opensearch.index.mapper.MapperService;

/**
* Extends the Codec to support new file formats for composite indices eg: star tree index
* based on the mappings.
*
* @opensearch.experimental
*/
@ExperimentalApi
public class Composite99Codec extends FilterCodec {
public static final String COMPOSITE_INDEX_CODEC_NAME = "Composite99Codec";
private final MapperService mapperService;

public Composite99Codec() {
this(COMPOSITE_INDEX_CODEC_NAME, new Lucene99Codec(), null);
}

public Composite99Codec(Lucene99Codec.Mode compressionMode, MapperService mapperService, Logger logger) {
this(COMPOSITE_INDEX_CODEC_NAME, new PerFieldMappingPostingFormatCodec(compressionMode, mapperService, logger), mapperService);
}

/**
* Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec and a unique name to
* this ctor.
*
* @param name name of the codec
* @param delegate codec delegate
* @param mapperService mapper service instance
*/
protected Composite99Codec(String name, Codec delegate, MapperService mapperService) {
super(name, delegate);
this.mapperService = mapperService;
}

@Override
public DocValuesFormat docValuesFormat() {
return new Composite90DocValuesFormat(mapperService);
}
}
Loading

0 comments on commit e797a2d

Please sign in to comment.