From fea48471991b30d5980d20082860377c7af5ac6c Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Tue, 14 Jun 2022 07:08:45 +0000 Subject: [PATCH 01/25] Add experimental support for zstd and lz4 (native) compression. Add experimental support for zstd (with and without dictionary support) and lz4 (native) compressions as discussed in: https://github.com/opensearch-project/OpenSearch/issues/3354. Users would be able to set the index.codec setting with the values "lz4_native" (for lz4 native), "zstd" (for zstd with dictionary), and "zstd_no_dict" (for zstd without a dictionary). Signed-off-by: Mulugeta Mammo --- buildSrc/version.properties | 2 + server/build.gradle | 4 + .../experimental/LZ4CompressionMode.java | 172 +++++++++++++++ .../Lucene90CustomStoredFieldsFormat.java | 115 ++++++++++ .../experimental/Lucene92CustomCodec.java | 105 +++++++++ ...rFieldMappingPostingFormatCustomCodec.java | 51 +++++ .../experimental/ZstdCompressionMode.java | 205 ++++++++++++++++++ .../ZstdNoDictCompressionMode.java | 185 ++++++++++++++++ .../opensearch/index/codec/CodecService.java | 15 ++ .../opensearch/index/engine/EngineConfig.java | 3 + .../services/org.apache.lucene.codecs.Codec | 1 + 11 files changed, 858 insertions(+) create mode 100644 server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java create mode 100644 server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java create mode 100644 server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java create mode 100644 server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java create mode 100644 server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java create mode 100644 server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java create mode 100644 server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec diff --git a/buildSrc/version.properties b/buildSrc/version.properties index b43d9b1bce8d8..e16af92a03844 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -17,6 +17,8 @@ supercsv = 2.4.0 log4j = 2.17.1 slf4j = 1.6.2 asm = 9.3 +lz4 = 1.8.0 +zstd = 1.5.0-4 # when updating the JNA version, also update the version in buildSrc/build.gradle jna = 5.5.0 diff --git a/server/build.gradle b/server/build.gradle index 9d9d12e798eab..8f6ba220ec2c3 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -134,6 +134,10 @@ dependencies { // jna api "net.java.dev.jna:jna:${versions.jna}" + // lz4 native and zstd compressions + api "org.lz4:lz4-java:${versions.lz4}" + api "com.github.luben:zstd-jni:${versions.zstd}" + testImplementation(project(":test:framework")) { // tests use the locally compiled version of server exclude group: 'org.opensearch', module: 'server' diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java new file mode 100644 index 0000000000000..ec585421ea365 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java @@ -0,0 +1,172 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.apache.lucene.codecs.experimental; + +import java.io.IOException; +import net.jpountz.lz4.LZ4Compressor; +import net.jpountz.lz4.LZ4Factory; +import net.jpountz.lz4.LZ4FastDecompressor; +import org.apache.lucene.codecs.compressing.CompressionMode; +import org.apache.lucene.codecs.compressing.Compressor; +import org.apache.lucene.codecs.compressing.Decompressor; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; + +/** LZ4 JNI based Compression Mode */ +public class LZ4CompressionMode extends CompressionMode { + private static final int NUM_SUB_BLOCKS = 10; + + /** default constructor */ + public LZ4CompressionMode() {} + + @Override + public Compressor newCompressor() { + return new LZ4CompressionMode.LZ4InnerCompressor(); + } + + @Override + public Decompressor newDecompressor() { + return new LZ4CompressionMode.LZ4InnerDecompressor(); + } + + /** LZ4 compressor */ + private static final class LZ4InnerCompressor extends Compressor { + byte[] compressedBuffer; + LZ4Compressor compressor; + + /** Default constructor */ + public LZ4InnerCompressor() { + compressedBuffer = BytesRef.EMPTY_BYTES; + compressor = LZ4Factory.nativeInstance().fastCompressor(); + } + + @Override + public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { + int blockLength = (len + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; + out.writeVInt(blockLength); + + final int end = off + len; + + for (int start = off; start < end; start += blockLength) { + int l = Math.min(blockLength, off + len - start); + + if (l == 0) { + out.writeVInt(0); + return; + } + + final int maxCompressedLength = compressor.maxCompressedLength(l); + compressedBuffer = ArrayUtil.grow(compressedBuffer, maxCompressedLength); + + int compressedSize = compressor.compress(bytes, start, l, compressedBuffer, 0, compressedBuffer.length); + + out.writeVInt(compressedSize); + out.writeBytes(compressedBuffer, compressedSize); + } + } + + @Override + public void close() throws IOException {} + } + + // decompression + + /** LZ4 decompressor */ + private static final class LZ4InnerDecompressor extends Decompressor { + + byte[] compressed; + LZ4FastDecompressor decompressor; + + /** default decompressor */ + public LZ4InnerDecompressor() { + compressed = BytesRef.EMPTY_BYTES; + decompressor = LZ4Factory.nativeInstance().fastDecompressor(); + } + + @Override + public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException { + assert offset + length <= originalLength; + + if (length == 0) { + bytes.length = 0; + return; + } + + final int blockLength = in.readVInt(); + bytes.offset = bytes.length = 0; + int offsetInBlock = 0; + int offsetInBytesRef = offset; + + // Skip unneeded blocks + while (offsetInBlock + blockLength < offset) { + final int compressedLength = in.readVInt(); + in.skipBytes(compressedLength); + offsetInBlock += blockLength; + offsetInBytesRef -= blockLength; + } + + // Read blocks that intersect with the interval we need + while (offsetInBlock < offset + length) { + bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + blockLength); + final int compressedLength = in.readVInt(); + if (compressedLength == 0) { + return; + } + compressed = ArrayUtil.grow(compressed, compressedLength); + in.readBytes(compressed, 0, compressedLength); + + int l = Math.min(blockLength, originalLength - offsetInBlock); + bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + l); + + byte[] output = new byte[l]; + + decompressor.decompress(compressed, 0, output, 0, l); + System.arraycopy(output, 0, bytes.bytes, bytes.length, l); + + bytes.length += l; + offsetInBlock += blockLength; + } + + bytes.offset = offsetInBytesRef; + bytes.length = length; + assert bytes.isValid(); + } + + @Override + public Decompressor clone() { + return new LZ4InnerDecompressor(); + } + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java new file mode 100644 index 0000000000000..dcfa9c5c83e48 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java @@ -0,0 +1,115 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.apache.lucene.codecs.experimental; + +import java.io.IOException; +import java.util.Objects; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.codecs.compressing.CompressionMode; +import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +/** Stored field format used by plugaable codec */ +public class Lucene90CustomStoredFieldsFormat extends StoredFieldsFormat { + private static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024; + private static final int LZ4_NATIVE_BLOCK_LENGTH = 10 * 8 * 1024; + + public static final String MODE_KEY = Lucene90CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; + + final Lucene92CustomCodec.Mode mode; + + private int compressionLevel; + + /** default constructor */ + public Lucene90CustomStoredFieldsFormat() { + this(Lucene92CustomCodec.Mode.LZ4_NATIVE, Lucene92CustomCodec.defaultCompressionLevel); + } + + /** Stored fields format with specified compression algo. */ + public Lucene90CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode, int compressionLevel) { + this.mode = Objects.requireNonNull(mode); + this.compressionLevel = compressionLevel; + } + + @Override + public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { + String value = si.getAttribute(MODE_KEY); + if (value == null) { + throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name); + } + Lucene92CustomCodec.Mode mode = Lucene92CustomCodec.Mode.valueOf(value); + return impl(mode).fieldsReader(directory, si, fn, context); + } + + @Override + public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException { + String previous = si.putAttribute(MODE_KEY, mode.name()); + if (previous != null && previous.equals(mode.name()) == false) { + throw new IllegalStateException( + "found existing value for " + MODE_KEY + " for segment: " + si.name + "old=" + previous + ", new=" + mode.name() + ); + } + return impl(mode).fieldsWriter(directory, si, context); + } + + StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { + switch (mode) { + case ZSTD: + return new Lucene90CompressingStoredFieldsFormat("CustomStoredFieldsZstd", ZSTD_MODE, ZSTD_BLOCK_LENGTH, 4096, 10); + + case ZSTD_NO_DICT: + return new Lucene90CompressingStoredFieldsFormat( + "CustomStoredFieldsZstdNoDict", + ZSTD_MODE_NO_DICT, + ZSTD_BLOCK_LENGTH, + 4096, + 10 + ); + + case LZ4_NATIVE: + return new Lucene90CompressingStoredFieldsFormat("CustomStoredFieldsLz4", LZ4_MODE, LZ4_NATIVE_BLOCK_LENGTH, 1024, 10); + + default: + throw new AssertionError(); + } + } + + public final CompressionMode ZSTD_MODE = new ZstdCompressionMode(compressionLevel); + public final CompressionMode ZSTD_MODE_NO_DICT = new ZstdNoDictCompressionMode(compressionLevel); + public final CompressionMode LZ4_MODE = new LZ4CompressionMode(); +} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java new file mode 100644 index 0000000000000..7aeb46e0d0b06 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.apache.lucene.codecs.experimental; + +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.lucene92.Lucene92Codec; + +/** Custom codec for different compression algorithm */ +public class Lucene92CustomCodec extends FilterCodec { + + public static final int defaultCompressionLevel = 6; + private final StoredFieldsFormat storedFieldsFormat; + private int compressionLevel; + + /** Compression modes */ + public static enum Mode { + // Zstandard with dictionary + ZSTD, + // Zstandard without dictionary + ZSTD_NO_DICT, + // lz4 native + LZ4_NATIVE + } + + /** Default codec */ + public Lucene92CustomCodec() { + this(Mode.LZ4_NATIVE, defaultCompressionLevel); + } + + /** new codec for a given compression algorithm and default compression level */ + public Lucene92CustomCodec(Mode compressionMode) { + this(compressionMode, defaultCompressionLevel); + } + + /** new codec for a given compression algorithm and compression level */ + public Lucene92CustomCodec(Mode compressionMode, int compressionLevel) { + super("Lucene92CustomCodec", new Lucene92Codec()); + this.compressionLevel = compressionLevel; + + switch (compressionMode) { + case ZSTD: + if (this.compressionLevel < 1 || this.compressionLevel > 22) throw new IllegalArgumentException( + "Invalid compression level" + ); + + this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD, compressionLevel); + break; + + case ZSTD_NO_DICT: + if (this.compressionLevel < 1 || this.compressionLevel > 22) throw new IllegalArgumentException( + "Invalid compression level" + ); + + this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD_NO_DICT, compressionLevel); + break; + + case LZ4_NATIVE: + this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.LZ4_NATIVE, compressionLevel); + break; + default: + throw new IllegalArgumentException("Chosen compression mode does not exist"); + } + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java new file mode 100644 index 0000000000000..a2c273b0c666f --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.apache.lucene.codecs.experimental; + +import org.apache.lucene.codecs.Codec; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.common.lucene.Lucene; + +public class PerFieldMappingPostingFormatCustomCodec extends Lucene92CustomCodec { + private final MapperService mapperService; + + static { + assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingPostingFormatCustomCodec.class) + : "PerFieldMappingPostingFormatCustomCodec must subclass the latest " + "lucene codec: " + Lucene.LATEST_CODEC; + } + + public PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService) { + super(compressionMode); + this.mapperService = mapperService; + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java new file mode 100644 index 0000000000000..a861ff22690e4 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java @@ -0,0 +1,205 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.apache.lucene.codecs.experimental; + +import com.github.luben.zstd.*; +import java.io.IOException; +import org.apache.lucene.codecs.compressing.CompressionMode; +import org.apache.lucene.codecs.compressing.Compressor; +import org.apache.lucene.codecs.compressing.Decompressor; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; + +/** Zstandard Compression Mode */ +public class ZstdCompressionMode extends CompressionMode { + public static final int defaultLevel = 6; + private static final int NUM_SUB_BLOCKS = 10; + private static final int DICT_SIZE_FACTOR = 6; + private final int level; + + /** default constructor */ + ZstdCompressionMode() { + this.level = defaultLevel; + } + + /** compression mode for a given compression level */ + ZstdCompressionMode(int level) { + this.level = level; + } + + @Override + public Compressor newCompressor() { + return new ZSTDCompressor(level); + } + + @Override + public Decompressor newDecompressor() { + return new ZSTDDecompressor(); + } + + /** zstandard compressor */ + private static final class ZSTDCompressor extends Compressor { + + int compressionLevel; + byte[] compressedBuffer; + + /** compressor with a given compresion level */ + public ZSTDCompressor(int compressionLevel) { + this.compressionLevel = compressionLevel; + compressedBuffer = BytesRef.EMPTY_BYTES; + } + + @Override + public void close() throws IOException {} + + /*resuable compress function*/ + private void doCompress(byte[] bytes, int off, int len, ZstdCompressCtx cctx, DataOutput out) throws IOException { + if (len == 0) { + out.writeVInt(0); + return; + } + final int maxCompressedLength = (int) Zstd.compressBound(len); + compressedBuffer = ArrayUtil.grow(compressedBuffer, maxCompressedLength); + + int compressedSize = cctx.compressByteArray(compressedBuffer, 0, compressedBuffer.length, bytes, off, len); + + out.writeVInt(compressedSize); + out.writeBytes(compressedBuffer, compressedSize); + } + + @Override + public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { + final int dictLength = len / (NUM_SUB_BLOCKS * DICT_SIZE_FACTOR); + final int blockLength = (len - dictLength + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; + out.writeVInt(dictLength); + out.writeVInt(blockLength); + + final int end = off + len; + + try (ZstdCompressCtx cctx = new ZstdCompressCtx()) { + cctx.setLevel(this.compressionLevel); + + // dictionary compression first + doCompress(bytes, off, dictLength, cctx, out); + cctx.loadDict(new ZstdDictCompress(bytes, off, dictLength, this.compressionLevel)); + + for (int start = off + dictLength; start < end; start += blockLength) { + int l = Math.min(blockLength, off + len - start); + doCompress(bytes, start, l, cctx, out); + } + } + } + } + + /** zstandard decompressor */ + private static final class ZSTDDecompressor extends Decompressor { + + byte[] compressed; + + /** default decompressor */ + public ZSTDDecompressor() { + compressed = BytesRef.EMPTY_BYTES; + } + + /*resuable decompress function*/ + private void doDecompress(DataInput in, ZstdDecompressCtx dctx, BytesRef bytes, int decompressedLen) throws IOException { + final int compressedLength = in.readVInt(); + if (compressedLength == 0) { + return; + } + + compressed = ArrayUtil.grow(compressed, compressedLength); + in.readBytes(compressed, 0, compressedLength); + + bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + decompressedLen); + int uncompressed = dctx.decompressByteArray(bytes.bytes, bytes.length, decompressedLen, compressed, 0, compressedLength); + + if (decompressedLen != uncompressed) { + throw new IllegalStateException(decompressedLen + " " + uncompressed); + } + bytes.length += uncompressed; + } + + @Override + public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException { + assert offset + length <= originalLength; + + if (length == 0) { + bytes.length = 0; + return; + } + final int dictLength = in.readVInt(); + final int blockLength = in.readVInt(); + bytes.bytes = ArrayUtil.grow(bytes.bytes, dictLength); + bytes.offset = bytes.length = 0; + + try (ZstdDecompressCtx dctx = new ZstdDecompressCtx()) { + + // decompress dictionary first + doDecompress(in, dctx, bytes, dictLength); + + dctx.loadDict(new ZstdDictDecompress(bytes.bytes, 0, dictLength)); + + int offsetInBlock = dictLength; + int offsetInBytesRef = offset; + + // Skip unneeded blocks + while (offsetInBlock + blockLength < offset) { + final int compressedLength = in.readVInt(); + in.skipBytes(compressedLength); + offsetInBlock += blockLength; + offsetInBytesRef -= blockLength; + } + + // Read blocks that intersect with the interval we need + while (offsetInBlock < offset + length) { + bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + blockLength); + int l = Math.min(blockLength, originalLength - offsetInBlock); + doDecompress(in, dctx, bytes, l); + offsetInBlock += blockLength; + } + + bytes.offset = offsetInBytesRef; + bytes.length = length; + assert bytes.isValid(); + } + } + + @Override + public Decompressor clone() { + return new ZSTDDecompressor(); + } + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java new file mode 100644 index 0000000000000..f5800775beb4b --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java @@ -0,0 +1,185 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.apache.lucene.codecs.experimental; + +import com.github.luben.zstd.Zstd; +import java.io.IOException; +import org.apache.lucene.codecs.compressing.CompressionMode; +import org.apache.lucene.codecs.compressing.Compressor; +import org.apache.lucene.codecs.compressing.Decompressor; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; + +/** Zstandard Compression Mode */ +public class ZstdNoDictCompressionMode extends CompressionMode { + private static final int NUM_SUB_BLOCKS = 10; + private final int level; + public static final int defaultLevel = 6; + + /** default constructor */ + ZstdNoDictCompressionMode() { + this.level = defaultLevel; + } + + /** compression mode for a given compression level */ + ZstdNoDictCompressionMode(int level) { + this.level = level; + } + + @Override + public Compressor newCompressor() { + return new ZSTDCompressor(level); + } + + @Override + public Decompressor newDecompressor() { + return new ZSTDDecompressor(); + } + + /** zstandard compressor */ + private static final class ZSTDCompressor extends Compressor { + + int compressionLevel; + byte[] compressedBuffer; + + /** compressor with a given compresion level */ + public ZSTDCompressor(int compressionLevel) { + this.compressionLevel = compressionLevel; + compressedBuffer = BytesRef.EMPTY_BYTES; + } + + @Override + public void close() throws IOException {} + + @Override + public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { + + int blockLength = (len + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; + out.writeVInt(blockLength); + + final int end = off + len; + + for (int start = off; start < end; start += blockLength) { + int l = Math.min(blockLength, off + len - start); + + if (l == 0) { + out.writeVInt(0); + return; + } + + final int maxCompressedLength = (int) Zstd.compressBound(l); + compressedBuffer = ArrayUtil.grow(compressedBuffer, maxCompressedLength); + + int compressedSize = (int) Zstd.compressByteArray( + compressedBuffer, + 0, + compressedBuffer.length, + bytes, + start, + l, + this.compressionLevel + ); + + out.writeVInt(compressedSize); + out.writeBytes(compressedBuffer, compressedSize); + } + } + } + + /** zstandard decompressor */ + private static final class ZSTDDecompressor extends Decompressor { + + byte[] compressed; + + /** default decompressor */ + public ZSTDDecompressor() { + compressed = BytesRef.EMPTY_BYTES; + } + + @Override + public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException { + assert offset + length <= originalLength; + + if (length == 0) { + bytes.length = 0; + return; + } + + final int blockLength = in.readVInt(); + bytes.offset = bytes.length = 0; + int offsetInBlock = 0; + int offsetInBytesRef = offset; + + // Skip unneeded blocks + while (offsetInBlock + blockLength < offset) { + final int compressedLength = in.readVInt(); + in.skipBytes(compressedLength); + offsetInBlock += blockLength; + offsetInBytesRef -= blockLength; + } + + // Read blocks that intersect with the interval we need + while (offsetInBlock < offset + length) { + bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + blockLength); + final int compressedLength = in.readVInt(); + if (compressedLength == 0) { + return; + } + compressed = ArrayUtil.grow(compressed, compressedLength); + in.readBytes(compressed, 0, compressedLength); + + int l = Math.min(blockLength, originalLength - offsetInBlock); + bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + l); + + byte[] output = new byte[l]; + + final int uncompressed = (int) Zstd.decompressByteArray(output, 0, l, compressed, 0, compressedLength); + System.arraycopy(output, 0, bytes.bytes, bytes.length, uncompressed); + + bytes.length += uncompressed; + offsetInBlock += blockLength; + } + + bytes.offset = offsetInBytesRef; + bytes.length = length; + assert bytes.isValid(); + } + + @Override + public Decompressor clone() { + return new ZSTDDecompressor(); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/CodecService.java b/server/src/main/java/org/opensearch/index/codec/CodecService.java index ff254a63fadb6..c16deb31703c6 100644 --- a/server/src/main/java/org/opensearch/index/codec/CodecService.java +++ b/server/src/main/java/org/opensearch/index/codec/CodecService.java @@ -36,6 +36,8 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene92.Lucene92Codec; import org.apache.lucene.codecs.lucene92.Lucene92Codec.Mode; +import org.apache.lucene.codecs.experimental.Lucene92CustomCodec; +import org.apache.lucene.codecs.experimental.PerFieldMappingPostingFormatCustomCodec; import org.opensearch.common.Nullable; import org.opensearch.common.collect.MapBuilder; import org.opensearch.index.mapper.MapperService; @@ -58,15 +60,28 @@ public class CodecService { public static final String BEST_COMPRESSION_CODEC = "best_compression"; /** the raw unfiltered lucene default. useful for testing */ public static final String LUCENE_DEFAULT_CODEC = "lucene_default"; + /** zstd (with and without dictionary) and lz4 (native) compression */ + public static final String ZSTD_CODEC = "zstd"; + public static final String ZSTD_NO_DICT_CODEC = "zstd_no_dict"; + public static final String LZ4_NATIVE_CODEC = "lz4_native"; public CodecService(@Nullable MapperService mapperService, Logger logger) { final MapBuilder codecs = MapBuilder.newMapBuilder(); if (mapperService == null) { codecs.put(DEFAULT_CODEC, new Lucene92Codec()); codecs.put(BEST_COMPRESSION_CODEC, new Lucene92Codec(Mode.BEST_COMPRESSION)); + codecs.put(ZSTD_CODEC, new Lucene92CustomCodec(Lucene92CustomCodec.Mode.ZSTD)); + codecs.put(ZSTD_NO_DICT_CODEC, new Lucene92CustomCodec(Lucene92CustomCodec.Mode.ZSTD_NO_DICT)); + codecs.put(LZ4_NATIVE_CODEC, new Lucene92CustomCodec(Lucene92CustomCodec.Mode.LZ4_NATIVE)); } else { codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); + codecs.put(ZSTD_CODEC, new PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode.ZSTD, mapperService)); + codecs.put( + ZSTD_NO_DICT_CODEC, + new PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode.ZSTD_NO_DICT, mapperService) + ); + codecs.put(LZ4_NATIVE_CODEC, new PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode.LZ4_NATIVE, mapperService)); } codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); for (String codec : Codec.availableCodecs()) { diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 4ae6646ed14f0..dea9175c886df 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -123,6 +123,9 @@ public Supplier retentionLeasesSupplier() { case "default": case "best_compression": case "lucene_default": + case "zstd": + case "zstd_no_dict": + case "lz4_native": return s; default: if (Codec.availableCodecs().contains(s) == false) { // we don't error message the not officially supported ones diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec new file mode 100644 index 0000000000000..cda4862523bd3 --- /dev/null +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -0,0 +1 @@ +org.apache.lucene.codecs.experimental.Lucene92CustomCodec From 8aac766089b7d079c78970a8f3c00c5e1cb19fbb Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Tue, 14 Jun 2022 16:28:42 +0000 Subject: [PATCH 02/25] Fix license issues, add tests for zstd, zstd_no_dict, and lz4_native compressions. Signed-off-by: Mulugeta Mammo --- buildSrc/version.properties | 2 +- modules/transport-netty4/build.gradle | 6 - plugins/transport-nio/build.gradle | 6 - server/licenses/lz4-java-1.8.0.jar.sha1 | 1 + server/licenses/lz4-java-LICENSE.txt | 202 ++++++++++++++++++ server/licenses/lz4-java-NOTICE.txt | 3 + server/licenses/zstd-jni-1.5.2-3.jar.sha1 | 1 + server/licenses/zstd-jni-LICENSE.txt | 29 +++ server/licenses/zstd-jni-NOTICE.txt | 1 + ...rFieldMappingPostingFormatCustomCodec.java | 7 - .../org/opensearch/bootstrap/security.policy | 2 + .../opensearch/index/codec/CodecTests.java | 33 +++ 12 files changed, 273 insertions(+), 20 deletions(-) create mode 100644 server/licenses/lz4-java-1.8.0.jar.sha1 create mode 100644 server/licenses/lz4-java-LICENSE.txt create mode 100644 server/licenses/lz4-java-NOTICE.txt create mode 100644 server/licenses/zstd-jni-1.5.2-3.jar.sha1 create mode 100644 server/licenses/zstd-jni-LICENSE.txt create mode 100644 server/licenses/zstd-jni-NOTICE.txt diff --git a/buildSrc/version.properties b/buildSrc/version.properties index e16af92a03844..81bacf6702d7c 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -18,7 +18,7 @@ log4j = 2.17.1 slf4j = 1.6.2 asm = 9.3 lz4 = 1.8.0 -zstd = 1.5.0-4 +zstd = 1.5.2-3 # when updating the JNA version, also update the version in buildSrc/build.gradle jna = 5.5.0 diff --git a/modules/transport-netty4/build.gradle b/modules/transport-netty4/build.gradle index 74448e7a5ac06..a9517b2b3d2e5 100644 --- a/modules/transport-netty4/build.gradle +++ b/modules/transport-netty4/build.gradle @@ -182,7 +182,6 @@ thirdPartyAudit { 'org.slf4j.LoggerFactory', 'org.slf4j.spi.LocationAwareLogger', - 'com.github.luben.zstd.Zstd', 'com.google.protobuf.ExtensionRegistryLite', 'com.google.protobuf.MessageLiteOrBuilder', 'com.google.protobuf.nano.CodedOutputByteBufferNano', @@ -199,11 +198,6 @@ thirdPartyAudit { 'com.ning.compress.lzf.util.ChunkDecoderFactory', 'com.ning.compress.lzf.util.ChunkEncoderFactory', 'lzma.sdk.lzma.Encoder', - 'net.jpountz.lz4.LZ4Compressor', - 'net.jpountz.lz4.LZ4Factory', - 'net.jpountz.lz4.LZ4FastDecompressor', - 'net.jpountz.xxhash.XXHash32', - 'net.jpountz.xxhash.XXHashFactory', 'io.netty.internal.tcnative.AsyncSSLPrivateKeyMethod', 'io.netty.internal.tcnative.AsyncTask', 'io.netty.internal.tcnative.CertificateCallback', diff --git a/plugins/transport-nio/build.gradle b/plugins/transport-nio/build.gradle index a7e8c42a4e2d3..5e2168bed6254 100644 --- a/plugins/transport-nio/build.gradle +++ b/plugins/transport-nio/build.gradle @@ -110,7 +110,6 @@ thirdPartyAudit { 'org.slf4j.LoggerFactory', 'org.slf4j.spi.LocationAwareLogger', - 'com.github.luben.zstd.Zstd', 'com.google.protobuf.ExtensionRegistryLite', 'com.google.protobuf.MessageLiteOrBuilder', 'com.google.protobuf.nano.CodedOutputByteBufferNano', @@ -127,11 +126,6 @@ thirdPartyAudit { 'com.ning.compress.lzf.util.ChunkDecoderFactory', 'com.ning.compress.lzf.util.ChunkEncoderFactory', 'lzma.sdk.lzma.Encoder', - 'net.jpountz.lz4.LZ4Compressor', - 'net.jpountz.lz4.LZ4Factory', - 'net.jpountz.lz4.LZ4FastDecompressor', - 'net.jpountz.xxhash.XXHash32', - 'net.jpountz.xxhash.XXHashFactory', 'org.eclipse.jetty.alpn.ALPN$ClientProvider', 'org.eclipse.jetty.alpn.ALPN$ServerProvider', 'org.eclipse.jetty.alpn.ALPN', diff --git a/server/licenses/lz4-java-1.8.0.jar.sha1 b/server/licenses/lz4-java-1.8.0.jar.sha1 new file mode 100644 index 0000000000000..1aac31a3d4cb0 --- /dev/null +++ b/server/licenses/lz4-java-1.8.0.jar.sha1 @@ -0,0 +1 @@ +4b986a99445e49ea5fbf5d149c4b63f6ed6c6780 diff --git a/server/licenses/lz4-java-LICENSE.txt b/server/licenses/lz4-java-LICENSE.txt new file mode 100644 index 0000000000000..d645695673349 --- /dev/null +++ b/server/licenses/lz4-java-LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/server/licenses/lz4-java-NOTICE.txt b/server/licenses/lz4-java-NOTICE.txt new file mode 100644 index 0000000000000..a9f0e2b890655 --- /dev/null +++ b/server/licenses/lz4-java-NOTICE.txt @@ -0,0 +1,3 @@ +LZ4 compression for Java, based on Yann Collet's work available at http://code.google.com/p/lz4/. + + diff --git a/server/licenses/zstd-jni-1.5.2-3.jar.sha1 b/server/licenses/zstd-jni-1.5.2-3.jar.sha1 new file mode 100644 index 0000000000000..a57e9470a9166 --- /dev/null +++ b/server/licenses/zstd-jni-1.5.2-3.jar.sha1 @@ -0,0 +1 @@ +f52de0603f31798455e48bd90e10a8f888dd6d93 \ No newline at end of file diff --git a/server/licenses/zstd-jni-LICENSE.txt b/server/licenses/zstd-jni-LICENSE.txt new file mode 100644 index 0000000000000..c4dd507c1c72f --- /dev/null +++ b/server/licenses/zstd-jni-LICENSE.txt @@ -0,0 +1,29 @@ +----------------------------------------------------------------------------- +** Beginning of "BSD License" text. ** + +Zstd-jni: JNI bindings to Zstd Library + +Copyright (c) 2015-present, Luben Karavelov/ All rights reserved. + +BSD License + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/server/licenses/zstd-jni-NOTICE.txt b/server/licenses/zstd-jni-NOTICE.txt new file mode 100644 index 0000000000000..389c97cbc892d --- /dev/null +++ b/server/licenses/zstd-jni-NOTICE.txt @@ -0,0 +1 @@ +The code for the JNI bindings to Zstd library was originally authored by Luben Karavelov diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java index a2c273b0c666f..a396cba76f643 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java @@ -32,18 +32,11 @@ package org.apache.lucene.codecs.experimental; -import org.apache.lucene.codecs.Codec; import org.opensearch.index.mapper.MapperService; -import org.opensearch.common.lucene.Lucene; public class PerFieldMappingPostingFormatCustomCodec extends Lucene92CustomCodec { private final MapperService mapperService; - static { - assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingPostingFormatCustomCodec.class) - : "PerFieldMappingPostingFormatCustomCodec must subclass the latest " + "lucene codec: " + Lucene.LATEST_CODEC; - } - public PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService) { super(compressionMode); this.mapperService = mapperService; diff --git a/server/src/main/resources/org/opensearch/bootstrap/security.policy b/server/src/main/resources/org/opensearch/bootstrap/security.policy index 3671782b9d12f..1197c1dbd94da 100644 --- a/server/src/main/resources/org/opensearch/bootstrap/security.policy +++ b/server/src/main/resources/org/opensearch/bootstrap/security.policy @@ -182,4 +182,6 @@ grant { permission java.io.FilePermission "/sys/fs/cgroup/memory", "read"; permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read"; + // ZSTD and lz4-java permissions + permission java.lang.RuntimePermission "*"; }; diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index 0275066f9af1b..0669e5b553a13 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -53,6 +53,7 @@ import org.opensearch.plugins.MapperPlugin; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.IndexSettingsModule; +import org.apache.lucene.codec.experimental; import java.io.IOException; import java.util.Collections; @@ -78,6 +79,21 @@ public void testBestCompression() throws Exception { assertStoredFieldsCompressionEquals(Lucene92Codec.Mode.BEST_COMPRESSION, codec); } + public void testZstdCompression() throws Exception { + Codec codec = createCodecService().codec("zstd"); + assertStoredFieldsCompressionEquals(Lucene92CustomCodec.Mode.ZSTD, codec); + } + + public void testZstdNoDictCompression() throws Exception { + Codec codec = createCodecService().codec("zstd_no_dict"); + assertStoredFieldsCompressionEquals(Lucene92CustomCodec.Mode.ZSTD_NO_DICT, codec); + } + + public void testLz4NativeCompression() throws Exception { + Codec codec = createCodecService().codec("lz4_native"); + assertStoredFieldsCompressionEquals(Lucene92CustomCodec.Mode.LZ4_NATIVE, codec); + } + // write some docs with it, inspect .si to see this was the used compression private void assertStoredFieldsCompressionEquals(Lucene92Codec.Mode expected, Codec actual) throws Exception { Directory dir = newDirectory(); @@ -96,6 +112,23 @@ private void assertStoredFieldsCompressionEquals(Lucene92Codec.Mode expected, Co dir.close(); } + private void assertStoredFieldsCompressionEquals(Lucene92CustomCodec.Mode expected, Codec actual) throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(null); + iwc.setCodec(actual); + IndexWriter iw = new IndexWriter(dir, iwc); + iw.addDocument(new Document()); + iw.commit(); + iw.close(); + DirectoryReader ir = DirectoryReader.open(dir); + SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); + String v = sr.getSegmentInfo().info.getAttribute(Lucene90CustomStoredFieldsFormat.MODE_KEY); + assertNotNull(v); + assertEquals(expected, Lucene92CustomCodec.Mode.valueOf(v)); + ir.close(); + dir.close(); + } + private CodecService createCodecService() throws IOException { Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); IndexSettings settings = IndexSettingsModule.newIndexSettings("_na", nodeSettings); From 33715b24636a46ba9799408e395d3b8ec0090240 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Tue, 14 Jun 2022 19:25:13 +0000 Subject: [PATCH 03/25] Fix DCO and and issues with CodecTests.java. Signed-off-by: Mulugeta Mammo --- .../java/org/opensearch/index/codec/CodecTests.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index 0669e5b553a13..9e7f59bbe7358 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -53,7 +53,8 @@ import org.opensearch.plugins.MapperPlugin; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.IndexSettingsModule; -import org.apache.lucene.codec.experimental; +import org.apache.lucene.codecs.experimental.Lucene92CustomCodec; +import org.apache.lucene.codecs.experimental.Lucene90CustomStoredFieldsFormat; import java.io.IOException; import java.util.Collections; @@ -81,17 +82,17 @@ public void testBestCompression() throws Exception { public void testZstdCompression() throws Exception { Codec codec = createCodecService().codec("zstd"); - assertStoredFieldsCompressionEquals(Lucene92CustomCodec.Mode.ZSTD, codec); + assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.ZSTD, codec); } public void testZstdNoDictCompression() throws Exception { Codec codec = createCodecService().codec("zstd_no_dict"); - assertStoredFieldsCompressionEquals(Lucene92CustomCodec.Mode.ZSTD_NO_DICT, codec); + assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.ZSTD_NO_DICT, codec); } public void testLz4NativeCompression() throws Exception { Codec codec = createCodecService().codec("lz4_native"); - assertStoredFieldsCompressionEquals(Lucene92CustomCodec.Mode.LZ4_NATIVE, codec); + assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.LZ4_NATIVE, codec); } // write some docs with it, inspect .si to see this was the used compression @@ -112,7 +113,7 @@ private void assertStoredFieldsCompressionEquals(Lucene92Codec.Mode expected, Co dir.close(); } - private void assertStoredFieldsCompressionEquals(Lucene92CustomCodec.Mode expected, Codec actual) throws Exception { + private void assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode expected, Codec actual) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(null); iwc.setCodec(actual); From df93e0aef4293e9496ed01c731b8491c9f59ec6c Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Wed, 15 Jun 2022 06:49:51 +0000 Subject: [PATCH 04/25] Fix forbidden api violation error for lz4-java. Signed-off-by: Mulugeta Mammo --- server/build.gradle | 7 ++++++- .../experimental/Lucene90CustomStoredFieldsFormat.java | 3 --- .../lucene/codecs/experimental/Lucene92CustomCodec.java | 2 -- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/build.gradle b/server/build.gradle index 8f6ba220ec2c3..8c38ee8081720 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -134,7 +134,7 @@ dependencies { // jna api "net.java.dev.jna:jna:${versions.jna}" - // lz4 native and zstd compressions + // lz4 (native) and zstd api "org.lz4:lz4-java:${versions.lz4}" api "com.github.luben:zstd-jni:${versions.zstd}" @@ -225,6 +225,11 @@ tasks.named("processResources").configure { dependsOn generateModulesList, generatePluginsList } +// lz4-java uses sun.misc.Unsafe in the UnsafeUtils class. +thirdPartyAudit.ignoreViolations( + 'net.jpountz.util.UnsafeUtils' +) + tasks.named("thirdPartyAudit").configure { ignoreMissingClasses( // from com.fasterxml.jackson.dataformat.yaml.YAMLMapper (jackson-dataformat-yaml) diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java index dcfa9c5c83e48..99c640f34e067 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java @@ -91,7 +91,6 @@ StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { switch (mode) { case ZSTD: return new Lucene90CompressingStoredFieldsFormat("CustomStoredFieldsZstd", ZSTD_MODE, ZSTD_BLOCK_LENGTH, 4096, 10); - case ZSTD_NO_DICT: return new Lucene90CompressingStoredFieldsFormat( "CustomStoredFieldsZstdNoDict", @@ -100,10 +99,8 @@ StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { 4096, 10 ); - case LZ4_NATIVE: return new Lucene90CompressingStoredFieldsFormat("CustomStoredFieldsLz4", LZ4_MODE, LZ4_NATIVE_BLOCK_LENGTH, 1024, 10); - default: throw new AssertionError(); } diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java index 7aeb46e0d0b06..c3f8b85d88755 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java @@ -76,7 +76,6 @@ public Lucene92CustomCodec(Mode compressionMode, int compressionLevel) { this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD, compressionLevel); break; - case ZSTD_NO_DICT: if (this.compressionLevel < 1 || this.compressionLevel > 22) throw new IllegalArgumentException( "Invalid compression level" @@ -84,7 +83,6 @@ public Lucene92CustomCodec(Mode compressionMode, int compressionLevel) { this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD_NO_DICT, compressionLevel); break; - case LZ4_NATIVE: this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.LZ4_NATIVE, compressionLevel); break; From b0cf47e2613657f51c5eaced4ccb883bc7bb6bc4 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Wed, 15 Jun 2022 15:25:44 +0000 Subject: [PATCH 05/25] Fix license headers. Remove and fix unnecessary fields. Signed-off-by: Mulugeta Mammo --- .../experimental/LZ4CompressionMode.java | 27 +----------- .../Lucene90CustomStoredFieldsFormat.java | 43 +++++-------------- .../experimental/Lucene92CustomCodec.java | 37 ++-------------- ...rFieldMappingPostingFormatCustomCodec.java | 25 +---------- .../experimental/ZstdCompressionMode.java | 29 ++----------- .../ZstdNoDictCompressionMode.java | 27 ++---------- 6 files changed, 23 insertions(+), 165 deletions(-) diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java index ec585421ea365..bb7620f46ff8b 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java @@ -6,30 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.apache.lucene.codecs.experimental; import java.io.IOException; @@ -46,6 +22,7 @@ /** LZ4 JNI based Compression Mode */ public class LZ4CompressionMode extends CompressionMode { + private static final int NUM_SUB_BLOCKS = 10; /** default constructor */ @@ -101,8 +78,6 @@ public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOEx public void close() throws IOException {} } - // decompression - /** LZ4 decompressor */ private static final class LZ4InnerDecompressor extends Decompressor { diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java index 99c640f34e067..b0554971f596e 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java @@ -6,30 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.apache.lucene.codecs.experimental; import java.io.IOException; @@ -46,14 +22,17 @@ /** Stored field format used by plugaable codec */ public class Lucene90CustomStoredFieldsFormat extends StoredFieldsFormat { + private static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024; private static final int LZ4_NATIVE_BLOCK_LENGTH = 10 * 8 * 1024; - public static final String MODE_KEY = Lucene90CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; + private final CompressionMode ZSTD_MODE; + private final CompressionMode ZSTD_MODE_NO_DICT; + private final CompressionMode LZ4_MODE; - final Lucene92CustomCodec.Mode mode; + private Lucene92CustomCodec.Mode mode; - private int compressionLevel; + public static final String MODE_KEY = Lucene90CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; /** default constructor */ public Lucene90CustomStoredFieldsFormat() { @@ -63,7 +42,9 @@ public Lucene90CustomStoredFieldsFormat() { /** Stored fields format with specified compression algo. */ public Lucene90CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode, int compressionLevel) { this.mode = Objects.requireNonNull(mode); - this.compressionLevel = compressionLevel; + ZSTD_MODE = new ZstdCompressionMode(compressionLevel); + ZSTD_MODE_NO_DICT = new ZstdNoDictCompressionMode(compressionLevel); + LZ4_MODE = new LZ4CompressionMode(); } @Override @@ -87,7 +68,7 @@ public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOCo return impl(mode).fieldsWriter(directory, si, context); } - StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { + private StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { switch (mode) { case ZSTD: return new Lucene90CompressingStoredFieldsFormat("CustomStoredFieldsZstd", ZSTD_MODE, ZSTD_BLOCK_LENGTH, 4096, 10); @@ -105,8 +86,4 @@ StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { throw new AssertionError(); } } - - public final CompressionMode ZSTD_MODE = new ZstdCompressionMode(compressionLevel); - public final CompressionMode ZSTD_MODE_NO_DICT = new ZstdNoDictCompressionMode(compressionLevel); - public final CompressionMode LZ4_MODE = new LZ4CompressionMode(); } diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java index c3f8b85d88755..02afeb9a4759a 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java @@ -6,30 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.apache.lucene.codecs.experimental; import org.apache.lucene.codecs.FilterCodec; @@ -39,9 +15,9 @@ /** Custom codec for different compression algorithm */ public class Lucene92CustomCodec extends FilterCodec { - public static final int defaultCompressionLevel = 6; private final StoredFieldsFormat storedFieldsFormat; - private int compressionLevel; + + public static final int defaultCompressionLevel = 6; /** Compression modes */ public static enum Mode { @@ -66,20 +42,15 @@ public Lucene92CustomCodec(Mode compressionMode) { /** new codec for a given compression algorithm and compression level */ public Lucene92CustomCodec(Mode compressionMode, int compressionLevel) { super("Lucene92CustomCodec", new Lucene92Codec()); - this.compressionLevel = compressionLevel; switch (compressionMode) { case ZSTD: - if (this.compressionLevel < 1 || this.compressionLevel > 22) throw new IllegalArgumentException( - "Invalid compression level" - ); + if (compressionLevel < 1 || compressionLevel > 22) throw new IllegalArgumentException("Invalid compression level"); this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD, compressionLevel); break; case ZSTD_NO_DICT: - if (this.compressionLevel < 1 || this.compressionLevel > 22) throw new IllegalArgumentException( - "Invalid compression level" - ); + if (compressionLevel < 1 || compressionLevel > 22) throw new IllegalArgumentException("Invalid compression level"); this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD_NO_DICT, compressionLevel); break; diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java index a396cba76f643..a91c96a4b635e 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java @@ -6,35 +6,12 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.apache.lucene.codecs.experimental; import org.opensearch.index.mapper.MapperService; public class PerFieldMappingPostingFormatCustomCodec extends Lucene92CustomCodec { + private final MapperService mapperService; public PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService) { diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java index a861ff22690e4..25bbbe44a9d8e 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java @@ -6,30 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.apache.lucene.codecs.experimental; import com.github.luben.zstd.*; @@ -44,11 +20,14 @@ /** Zstandard Compression Mode */ public class ZstdCompressionMode extends CompressionMode { - public static final int defaultLevel = 6; + private static final int NUM_SUB_BLOCKS = 10; private static final int DICT_SIZE_FACTOR = 6; + private final int level; + public static final int defaultLevel = 6; + /** default constructor */ ZstdCompressionMode() { this.level = defaultLevel; diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java index f5800775beb4b..18074b562d54b 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java @@ -6,30 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.apache.lucene.codecs.experimental; import com.github.luben.zstd.Zstd; @@ -44,8 +20,11 @@ /** Zstandard Compression Mode */ public class ZstdNoDictCompressionMode extends CompressionMode { + private static final int NUM_SUB_BLOCKS = 10; + private final int level; + public static final int defaultLevel = 6; /** default constructor */ From 975cfd7d4a61cc333381b40801143c564539a923 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Tue, 21 Jun 2022 06:59:36 +0000 Subject: [PATCH 06/25] Fix magic numbers. Use more restrictive access modifiers. Signed-off-by: Mulugeta Mammo --- .../experimental/LZ4CompressionMode.java | 16 +++++----- .../Lucene90CustomStoredFieldsFormat.java | 30 ++++++++++++++----- .../experimental/Lucene92CustomCodec.java | 8 ++--- .../experimental/ZstdCompressionMode.java | 27 ++++++++--------- 4 files changed, 48 insertions(+), 33 deletions(-) diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java index bb7620f46ff8b..14f7cc27272ce 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java @@ -40,8 +40,8 @@ public Decompressor newDecompressor() { /** LZ4 compressor */ private static final class LZ4InnerCompressor extends Compressor { - byte[] compressedBuffer; - LZ4Compressor compressor; + private byte[] compressedBuffer; + private LZ4Compressor compressor; /** Default constructor */ public LZ4InnerCompressor() { @@ -81,12 +81,12 @@ public void close() throws IOException {} /** LZ4 decompressor */ private static final class LZ4InnerDecompressor extends Decompressor { - byte[] compressed; - LZ4FastDecompressor decompressor; + private byte[] compressedBuffer; + private LZ4FastDecompressor decompressor; /** default decompressor */ public LZ4InnerDecompressor() { - compressed = BytesRef.EMPTY_BYTES; + compressedBuffer = BytesRef.EMPTY_BYTES; decompressor = LZ4Factory.nativeInstance().fastDecompressor(); } @@ -119,15 +119,15 @@ public void decompress(DataInput in, int originalLength, int offset, int length, if (compressedLength == 0) { return; } - compressed = ArrayUtil.grow(compressed, compressedLength); - in.readBytes(compressed, 0, compressedLength); + compressedBuffer = ArrayUtil.grow(compressedBuffer, compressedLength); + in.readBytes(compressedBuffer, 0, compressedLength); int l = Math.min(blockLength, originalLength - offsetInBlock); bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + l); byte[] output = new byte[l]; - decompressor.decompress(compressed, 0, output, 0, l); + decompressor.decompress(compressedBuffer, 0, output, 0, l); System.arraycopy(output, 0, bytes.bytes, bytes.length, l); bytes.length += l; diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java index b0554971f596e..da646ab9623d0 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java @@ -25,18 +25,22 @@ public class Lucene90CustomStoredFieldsFormat extends StoredFieldsFormat { private static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024; private static final int LZ4_NATIVE_BLOCK_LENGTH = 10 * 8 * 1024; + private static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096; + private static final int ZSTD_BLOCK_SHIFT = 10; + private static final int LZ4_MAX_DOCS_PER_BLOCK = 1024; + private static final int LZ4_BLOCK_SHIFT = 10; private final CompressionMode ZSTD_MODE; private final CompressionMode ZSTD_MODE_NO_DICT; private final CompressionMode LZ4_MODE; - private Lucene92CustomCodec.Mode mode; + private final Lucene92CustomCodec.Mode mode; public static final String MODE_KEY = Lucene90CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; /** default constructor */ public Lucene90CustomStoredFieldsFormat() { - this(Lucene92CustomCodec.Mode.LZ4_NATIVE, Lucene92CustomCodec.defaultCompressionLevel); + this(Lucene92CustomCodec.Mode.LZ4_NATIVE, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); } /** Stored fields format with specified compression algo. */ @@ -62,7 +66,7 @@ public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOCo String previous = si.putAttribute(MODE_KEY, mode.name()); if (previous != null && previous.equals(mode.name()) == false) { throw new IllegalStateException( - "found existing value for " + MODE_KEY + " for segment: " + si.name + "old=" + previous + ", new=" + mode.name() + "found existing value for " + MODE_KEY + " for segment: " + si.name + " old = " + previous + ", new = " + mode.name() ); } return impl(mode).fieldsWriter(directory, si, context); @@ -71,17 +75,29 @@ public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOCo private StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { switch (mode) { case ZSTD: - return new Lucene90CompressingStoredFieldsFormat("CustomStoredFieldsZstd", ZSTD_MODE, ZSTD_BLOCK_LENGTH, 4096, 10); + return new Lucene90CompressingStoredFieldsFormat( + "CustomStoredFieldsZstd", + ZSTD_MODE, + ZSTD_BLOCK_LENGTH, + ZSTD_MAX_DOCS_PER_BLOCK, + ZSTD_BLOCK_SHIFT + ); case ZSTD_NO_DICT: return new Lucene90CompressingStoredFieldsFormat( "CustomStoredFieldsZstdNoDict", ZSTD_MODE_NO_DICT, ZSTD_BLOCK_LENGTH, - 4096, - 10 + ZSTD_MAX_DOCS_PER_BLOCK, + ZSTD_BLOCK_SHIFT ); case LZ4_NATIVE: - return new Lucene90CompressingStoredFieldsFormat("CustomStoredFieldsLz4", LZ4_MODE, LZ4_NATIVE_BLOCK_LENGTH, 1024, 10); + return new Lucene90CompressingStoredFieldsFormat( + "CustomStoredFieldsLz4", + LZ4_MODE, + LZ4_NATIVE_BLOCK_LENGTH, + LZ4_MAX_DOCS_PER_BLOCK, + LZ4_BLOCK_SHIFT + ); default: throw new AssertionError(); } diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java index 02afeb9a4759a..99a8c82e35513 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java @@ -15,9 +15,9 @@ /** Custom codec for different compression algorithm */ public class Lucene92CustomCodec extends FilterCodec { - private final StoredFieldsFormat storedFieldsFormat; + public static final int DEFAULT_COMPRESSION_LEVEL = 6; - public static final int defaultCompressionLevel = 6; + private final StoredFieldsFormat storedFieldsFormat; /** Compression modes */ public static enum Mode { @@ -31,12 +31,12 @@ public static enum Mode { /** Default codec */ public Lucene92CustomCodec() { - this(Mode.LZ4_NATIVE, defaultCompressionLevel); + this(Mode.LZ4_NATIVE, DEFAULT_COMPRESSION_LEVEL); } /** new codec for a given compression algorithm and default compression level */ public Lucene92CustomCodec(Mode compressionMode) { - this(compressionMode, defaultCompressionLevel); + this(compressionMode, DEFAULT_COMPRESSION_LEVEL); } /** new codec for a given compression algorithm and compression level */ diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java index 25bbbe44a9d8e..e83e566dd9597 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java @@ -23,24 +23,23 @@ public class ZstdCompressionMode extends CompressionMode { private static final int NUM_SUB_BLOCKS = 10; private static final int DICT_SIZE_FACTOR = 6; + private static final int DEFAULT_COMPRESSION_LEVEL = 6; - private final int level; - - public static final int defaultLevel = 6; + private final int compressionLevel; /** default constructor */ ZstdCompressionMode() { - this.level = defaultLevel; + this.compressionLevel = DEFAULT_COMPRESSION_LEVEL; } /** compression mode for a given compression level */ - ZstdCompressionMode(int level) { - this.level = level; + ZstdCompressionMode(int compressionLevel) { + this.compressionLevel = compressionLevel; } @Override public Compressor newCompressor() { - return new ZSTDCompressor(level); + return new ZSTDCompressor(compressionLevel); } @Override @@ -51,8 +50,8 @@ public Decompressor newDecompressor() { /** zstandard compressor */ private static final class ZSTDCompressor extends Compressor { - int compressionLevel; - byte[] compressedBuffer; + private final int compressionLevel; + private byte[] compressedBuffer; /** compressor with a given compresion level */ public ZSTDCompressor(int compressionLevel) { @@ -105,11 +104,11 @@ public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOEx /** zstandard decompressor */ private static final class ZSTDDecompressor extends Decompressor { - byte[] compressed; + private byte[] compressedBuffer; /** default decompressor */ public ZSTDDecompressor() { - compressed = BytesRef.EMPTY_BYTES; + compressedBuffer = BytesRef.EMPTY_BYTES; } /*resuable decompress function*/ @@ -119,11 +118,11 @@ private void doDecompress(DataInput in, ZstdDecompressCtx dctx, BytesRef bytes, return; } - compressed = ArrayUtil.grow(compressed, compressedLength); - in.readBytes(compressed, 0, compressedLength); + compressedBuffer = ArrayUtil.grow(compressedBuffer, compressedLength); + in.readBytes(compressedBuffer, 0, compressedLength); bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + decompressedLen); - int uncompressed = dctx.decompressByteArray(bytes.bytes, bytes.length, decompressedLen, compressed, 0, compressedLength); + int uncompressed = dctx.decompressByteArray(bytes.bytes, bytes.length, decompressedLen, compressedBuffer, 0, compressedLength); if (decompressedLen != uncompressed) { throw new IllegalStateException(decompressedLen + " " + uncompressed); From bdb928720eaf2f8350e787399e348a4f33f23710 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Tue, 21 Jun 2022 07:53:18 +0000 Subject: [PATCH 07/25] Use protected access modifier for Zstd and LZ4 compression mode classes. Signed-off-by: Mulugeta Mammo --- .../apache/lucene/codecs/experimental/LZ4CompressionMode.java | 2 +- .../lucene/codecs/experimental/ZstdCompressionMode.java | 4 ++-- .../lucene/codecs/experimental/ZstdNoDictCompressionMode.java | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java index 14f7cc27272ce..38fb70905cf5a 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java @@ -26,7 +26,7 @@ public class LZ4CompressionMode extends CompressionMode { private static final int NUM_SUB_BLOCKS = 10; /** default constructor */ - public LZ4CompressionMode() {} + protected LZ4CompressionMode() {} @Override public Compressor newCompressor() { diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java index e83e566dd9597..f558ba0756cd2 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java @@ -28,12 +28,12 @@ public class ZstdCompressionMode extends CompressionMode { private final int compressionLevel; /** default constructor */ - ZstdCompressionMode() { + protected ZstdCompressionMode() { this.compressionLevel = DEFAULT_COMPRESSION_LEVEL; } /** compression mode for a given compression level */ - ZstdCompressionMode(int compressionLevel) { + protected ZstdCompressionMode(int compressionLevel) { this.compressionLevel = compressionLevel; } diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java index 18074b562d54b..1a6f6f22ce99d 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java @@ -28,12 +28,12 @@ public class ZstdNoDictCompressionMode extends CompressionMode { public static final int defaultLevel = 6; /** default constructor */ - ZstdNoDictCompressionMode() { + protected ZstdNoDictCompressionMode() { this.level = defaultLevel; } /** compression mode for a given compression level */ - ZstdNoDictCompressionMode(int level) { + protected ZstdNoDictCompressionMode(int level) { this.level = level; } From 1d6bbbbf7ebfb2f93f9bfc229f8a5049ad557d23 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Tue, 21 Jun 2022 18:36:17 +0000 Subject: [PATCH 08/25] Allow negative compression levels for zstd. Use more restrictive access modifiers. Signed-off-by: Mulugeta Mammo --- .../codecs/experimental/LZ4CompressionMode.java | 4 ++-- .../experimental/Lucene92CustomCodec.java | 11 +++++++++-- .../experimental/ZstdNoDictCompressionMode.java | 17 ++++++++--------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java index 38fb70905cf5a..19c2930ccb72c 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java @@ -41,7 +41,7 @@ public Decompressor newDecompressor() { /** LZ4 compressor */ private static final class LZ4InnerCompressor extends Compressor { private byte[] compressedBuffer; - private LZ4Compressor compressor; + private final LZ4Compressor compressor; /** Default constructor */ public LZ4InnerCompressor() { @@ -82,7 +82,7 @@ public void close() throws IOException {} private static final class LZ4InnerDecompressor extends Decompressor { private byte[] compressedBuffer; - private LZ4FastDecompressor decompressor; + private final LZ4FastDecompressor decompressor; /** default decompressor */ public LZ4InnerDecompressor() { diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java index 99a8c82e35513..c52e3bc8a2499 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java @@ -17,6 +17,9 @@ public class Lucene92CustomCodec extends FilterCodec { public static final int DEFAULT_COMPRESSION_LEVEL = 6; + private static final int ZSTD_MIN_CLEVEL = -(1 << 17); + private static final int ZSTD_MAX_CLEVEL = 22; + private final StoredFieldsFormat storedFieldsFormat; /** Compression modes */ @@ -45,12 +48,16 @@ public Lucene92CustomCodec(Mode compressionMode, int compressionLevel) { switch (compressionMode) { case ZSTD: - if (compressionLevel < 1 || compressionLevel > 22) throw new IllegalArgumentException("Invalid compression level"); + if (compressionLevel < ZSTD_MIN_CLEVEL || compressionLevel > ZSTD_MAX_CLEVEL) throw new IllegalArgumentException( + "Invalid compression level" + ); this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD, compressionLevel); break; case ZSTD_NO_DICT: - if (compressionLevel < 1 || compressionLevel > 22) throw new IllegalArgumentException("Invalid compression level"); + if (compressionLevel < ZSTD_MIN_CLEVEL || compressionLevel > ZSTD_MAX_CLEVEL) throw new IllegalArgumentException( + "Invalid compression level" + ); this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD_NO_DICT, compressionLevel); break; diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java index 1a6f6f22ce99d..c342b1e2d014c 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java @@ -22,24 +22,23 @@ public class ZstdNoDictCompressionMode extends CompressionMode { private static final int NUM_SUB_BLOCKS = 10; + private static final int DEFAULT_COMPRESSION_LEVEL = 6; - private final int level; - - public static final int defaultLevel = 6; + private final int compressionLevel; /** default constructor */ protected ZstdNoDictCompressionMode() { - this.level = defaultLevel; + this.compressionLevel = DEFAULT_COMPRESSION_LEVEL; } /** compression mode for a given compression level */ - protected ZstdNoDictCompressionMode(int level) { - this.level = level; + protected ZstdNoDictCompressionMode(int compressionLevel) { + this.compressionLevel = compressionLevel; } @Override public Compressor newCompressor() { - return new ZSTDCompressor(level); + return new ZSTDCompressor(compressionLevel); } @Override @@ -50,7 +49,7 @@ public Decompressor newDecompressor() { /** zstandard compressor */ private static final class ZSTDCompressor extends Compressor { - int compressionLevel; + private final int compressionLevel; byte[] compressedBuffer; /** compressor with a given compresion level */ @@ -100,7 +99,7 @@ public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOEx /** zstandard decompressor */ private static final class ZSTDDecompressor extends Decompressor { - byte[] compressed; + private byte[] compressed; /** default decompressor */ public ZSTDDecompressor() { From c6b39fac4c8b9eed779f406d21a2220809b0587d Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Thu, 23 Jun 2022 20:43:25 +0000 Subject: [PATCH 09/25] Use a more restrictive permission for loading zstd-jni and lz4-java libraries. Signed-off-by: Mulugeta Mammo --- .../resources/org/opensearch/bootstrap/security.policy | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/server/src/main/resources/org/opensearch/bootstrap/security.policy b/server/src/main/resources/org/opensearch/bootstrap/security.policy index 1197c1dbd94da..ffe301793e89a 100644 --- a/server/src/main/resources/org/opensearch/bootstrap/security.policy +++ b/server/src/main/resources/org/opensearch/bootstrap/security.policy @@ -25,6 +25,7 @@ * under the License. */ + /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. @@ -79,6 +80,7 @@ grant codeBase "${codebase.jna}" { permission java.lang.RuntimePermission "accessDeclaredMembers"; }; + //// Everything else: grant { @@ -182,6 +184,7 @@ grant { permission java.io.FilePermission "/sys/fs/cgroup/memory", "read"; permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read"; - // ZSTD and lz4-java permissions - permission java.lang.RuntimePermission "*"; + // needed by zstd-jni and lz4-java + permission java.lang.RuntimePermission "loadLibrary.*"; }; + From 1050738544a2fd0ac3cbbc99c038aa45f1afd29c Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Fri, 24 Jun 2022 00:12:49 +0000 Subject: [PATCH 10/25] Rename a file (follow a consistent version naming convention). Signed-off-by: Mulugeta Mammo --- .../lucene/codecs/experimental/Lucene92CustomCodec.java | 6 +++--- ...sFormat.java => Lucene92CustomStoredFieldsFormat.java} | 8 ++++---- .../test/java/org/opensearch/index/codec/CodecTests.java | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) rename server/src/main/java/org/apache/lucene/codecs/experimental/{Lucene90CustomStoredFieldsFormat.java => Lucene92CustomStoredFieldsFormat.java} (94%) diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java index c52e3bc8a2499..1fecc6b0d8f4b 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java @@ -52,17 +52,17 @@ public Lucene92CustomCodec(Mode compressionMode, int compressionLevel) { "Invalid compression level" ); - this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD, compressionLevel); + this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(Mode.ZSTD, compressionLevel); break; case ZSTD_NO_DICT: if (compressionLevel < ZSTD_MIN_CLEVEL || compressionLevel > ZSTD_MAX_CLEVEL) throw new IllegalArgumentException( "Invalid compression level" ); - this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.ZSTD_NO_DICT, compressionLevel); + this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(Mode.ZSTD_NO_DICT, compressionLevel); break; case LZ4_NATIVE: - this.storedFieldsFormat = new Lucene90CustomStoredFieldsFormat(Mode.LZ4_NATIVE, compressionLevel); + this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(Mode.LZ4_NATIVE, compressionLevel); break; default: throw new IllegalArgumentException("Chosen compression mode does not exist"); diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomStoredFieldsFormat.java similarity index 94% rename from server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java rename to server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomStoredFieldsFormat.java index da646ab9623d0..85ea93b95cc19 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene90CustomStoredFieldsFormat.java +++ b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomStoredFieldsFormat.java @@ -21,7 +21,7 @@ import org.apache.lucene.store.IOContext; /** Stored field format used by plugaable codec */ -public class Lucene90CustomStoredFieldsFormat extends StoredFieldsFormat { +public class Lucene92CustomStoredFieldsFormat extends StoredFieldsFormat { private static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024; private static final int LZ4_NATIVE_BLOCK_LENGTH = 10 * 8 * 1024; @@ -36,15 +36,15 @@ public class Lucene90CustomStoredFieldsFormat extends StoredFieldsFormat { private final Lucene92CustomCodec.Mode mode; - public static final String MODE_KEY = Lucene90CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; + public static final String MODE_KEY = Lucene92CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; /** default constructor */ - public Lucene90CustomStoredFieldsFormat() { + public Lucene92CustomStoredFieldsFormat() { this(Lucene92CustomCodec.Mode.LZ4_NATIVE, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); } /** Stored fields format with specified compression algo. */ - public Lucene90CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode, int compressionLevel) { + public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode, int compressionLevel) { this.mode = Objects.requireNonNull(mode); ZSTD_MODE = new ZstdCompressionMode(compressionLevel); ZSTD_MODE_NO_DICT = new ZstdNoDictCompressionMode(compressionLevel); diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index 9e7f59bbe7358..3189a05096841 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -54,7 +54,7 @@ import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.IndexSettingsModule; import org.apache.lucene.codecs.experimental.Lucene92CustomCodec; -import org.apache.lucene.codecs.experimental.Lucene90CustomStoredFieldsFormat; +import org.apache.lucene.codecs.experimental.Lucene92CustomStoredFieldsFormat; import java.io.IOException; import java.util.Collections; @@ -123,7 +123,7 @@ private void assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode iw.close(); DirectoryReader ir = DirectoryReader.open(dir); SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); - String v = sr.getSegmentInfo().info.getAttribute(Lucene90CustomStoredFieldsFormat.MODE_KEY); + String v = sr.getSegmentInfo().info.getAttribute(Lucene92CustomStoredFieldsFormat.MODE_KEY); assertNotNull(v); assertEquals(expected, Lucene92CustomCodec.Mode.valueOf(v)); ir.close(); From cbfba87800631e8059da583b625eff443428b5b1 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Wed, 6 Jul 2022 18:13:14 +0000 Subject: [PATCH 11/25] Refactor and create a new custom-codecs sandbox module. Signed-off-by: Mulugeta Mammo --- buildSrc/version.properties | 2 - modules/transport-netty4/build.gradle | 6 + plugins/transport-nio/build.gradle | 6 + sandbox/modules/custom-codecs/build.gradle | 35 +++++ .../licenses/lz4-java-1.8.0.jar.sha1 | 0 .../licenses/lz4-java-LICENSE.txt | 0 .../licenses/lz4-java-NOTICE.txt | 0 .../licenses/zstd-jni-1.5.2-3.jar.sha1 | 0 .../licenses/zstd-jni-LICENSE.txt | 0 .../licenses/zstd-jni-NOTICE.txt | 0 .../codec/customcodec/CustomCodecPlugin.java | 30 ++++ .../codec/customcodec/CustomCodecService.java | 63 ++++++++ .../CustomCodecServiceFactory.java | 20 +++ .../customcodec/Lucene92CustomCodec.java | 141 ++++++++++++++++++ .../Lucene92CustomStoredFieldsFormat.java | 41 ++--- .../index/codec/customcodec/Lz4Codec.java | 26 ++++ .../codec/customcodec/Lz4CompressionMode.java | 10 +- .../PerFieldMappingPostingFormatCodec.java | 45 ++++++ .../index/codec/customcodec/ZstdCodec.java | 27 ++++ .../customcodec}/ZstdCompressionMode.java | 2 +- .../codec/customcodec/ZstdNoDictCodec.java | 26 ++++ .../ZstdNoDictCompressionMode.java | 4 +- .../index/codec/customcodec/package-info.java | 12 ++ .../services/org.apache.lucene.codecs.Codec | 3 + .../codec/customcodecs/CustomCodecTests.java | 88 +++++++++++ server/build.gradle | 9 -- .../experimental/Lucene92CustomCodec.java | 81 ---------- ...rFieldMappingPostingFormatCustomCodec.java | 21 --- .../opensearch/index/codec/CodecService.java | 15 -- .../opensearch/index/engine/EngineConfig.java | 3 - .../services/org.apache.lucene.codecs.Codec | 1 - .../opensearch/index/codec/CodecTests.java | 34 ----- 32 files changed, 559 insertions(+), 192 deletions(-) create mode 100644 sandbox/modules/custom-codecs/build.gradle rename {server => sandbox/modules/custom-codecs}/licenses/lz4-java-1.8.0.jar.sha1 (100%) rename {server => sandbox/modules/custom-codecs}/licenses/lz4-java-LICENSE.txt (100%) rename {server => sandbox/modules/custom-codecs}/licenses/lz4-java-NOTICE.txt (100%) rename {server => sandbox/modules/custom-codecs}/licenses/zstd-jni-1.5.2-3.jar.sha1 (100%) rename {server => sandbox/modules/custom-codecs}/licenses/zstd-jni-LICENSE.txt (100%) rename {server => sandbox/modules/custom-codecs}/licenses/zstd-jni-NOTICE.txt (100%) create mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java create mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java create mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java create mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java rename {server/src/main/java/org/apache/lucene/codecs/experimental => sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec}/Lucene92CustomStoredFieldsFormat.java (77%) create mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java rename server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java => sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4CompressionMode.java (94%) create mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java create mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java rename {server/src/main/java/org/apache/lucene/codecs/experimental => sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec}/ZstdCompressionMode.java (99%) create mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java rename {server/src/main/java/org/apache/lucene/codecs/experimental => sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec}/ZstdNoDictCompressionMode.java (98%) create mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java create mode 100644 sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec create mode 100644 sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java delete mode 100644 server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java delete mode 100644 server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java delete mode 100644 server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 81bacf6702d7c..b43d9b1bce8d8 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -17,8 +17,6 @@ supercsv = 2.4.0 log4j = 2.17.1 slf4j = 1.6.2 asm = 9.3 -lz4 = 1.8.0 -zstd = 1.5.2-3 # when updating the JNA version, also update the version in buildSrc/build.gradle jna = 5.5.0 diff --git a/modules/transport-netty4/build.gradle b/modules/transport-netty4/build.gradle index a9517b2b3d2e5..74448e7a5ac06 100644 --- a/modules/transport-netty4/build.gradle +++ b/modules/transport-netty4/build.gradle @@ -182,6 +182,7 @@ thirdPartyAudit { 'org.slf4j.LoggerFactory', 'org.slf4j.spi.LocationAwareLogger', + 'com.github.luben.zstd.Zstd', 'com.google.protobuf.ExtensionRegistryLite', 'com.google.protobuf.MessageLiteOrBuilder', 'com.google.protobuf.nano.CodedOutputByteBufferNano', @@ -198,6 +199,11 @@ thirdPartyAudit { 'com.ning.compress.lzf.util.ChunkDecoderFactory', 'com.ning.compress.lzf.util.ChunkEncoderFactory', 'lzma.sdk.lzma.Encoder', + 'net.jpountz.lz4.LZ4Compressor', + 'net.jpountz.lz4.LZ4Factory', + 'net.jpountz.lz4.LZ4FastDecompressor', + 'net.jpountz.xxhash.XXHash32', + 'net.jpountz.xxhash.XXHashFactory', 'io.netty.internal.tcnative.AsyncSSLPrivateKeyMethod', 'io.netty.internal.tcnative.AsyncTask', 'io.netty.internal.tcnative.CertificateCallback', diff --git a/plugins/transport-nio/build.gradle b/plugins/transport-nio/build.gradle index 5e2168bed6254..a7e8c42a4e2d3 100644 --- a/plugins/transport-nio/build.gradle +++ b/plugins/transport-nio/build.gradle @@ -110,6 +110,7 @@ thirdPartyAudit { 'org.slf4j.LoggerFactory', 'org.slf4j.spi.LocationAwareLogger', + 'com.github.luben.zstd.Zstd', 'com.google.protobuf.ExtensionRegistryLite', 'com.google.protobuf.MessageLiteOrBuilder', 'com.google.protobuf.nano.CodedOutputByteBufferNano', @@ -126,6 +127,11 @@ thirdPartyAudit { 'com.ning.compress.lzf.util.ChunkDecoderFactory', 'com.ning.compress.lzf.util.ChunkEncoderFactory', 'lzma.sdk.lzma.Encoder', + 'net.jpountz.lz4.LZ4Compressor', + 'net.jpountz.lz4.LZ4Factory', + 'net.jpountz.lz4.LZ4FastDecompressor', + 'net.jpountz.xxhash.XXHash32', + 'net.jpountz.xxhash.XXHashFactory', 'org.eclipse.jetty.alpn.ALPN$ClientProvider', 'org.eclipse.jetty.alpn.ALPN$ServerProvider', 'org.eclipse.jetty.alpn.ALPN', diff --git a/sandbox/modules/custom-codecs/build.gradle b/sandbox/modules/custom-codecs/build.gradle new file mode 100644 index 0000000000000..3307aa84bf8cd --- /dev/null +++ b/sandbox/modules/custom-codecs/build.gradle @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +apply plugin: 'opensearch.opensearchplugin' +apply plugin: 'opensearch.yaml-rest-test' + +opensearchplugin { + name 'custom-codecs' + description 'A plugin that implements zstd (with and without dictionary) and lz4 (native) compression algorithms.' + classname 'org.opensearch.index.codec.customcodec.CustomCodecPlugin' + licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt') + noticeFile rootProject.file('NOTICE.txt') +} + +dependencies { + api "org.lz4:lz4-java:1.8.0" + api "com.github.luben:zstd-jni:1.5.2-3" +} + +// lz4-java uses sun.misc.Unsafe in the UnsafeUtils class. +thirdPartyAudit.ignoreViolations( + 'net.jpountz.util.UnsafeUtils' +) + +yamlRestTest.enabled = false; +testingConventions.enabled = false; + diff --git a/server/licenses/lz4-java-1.8.0.jar.sha1 b/sandbox/modules/custom-codecs/licenses/lz4-java-1.8.0.jar.sha1 similarity index 100% rename from server/licenses/lz4-java-1.8.0.jar.sha1 rename to sandbox/modules/custom-codecs/licenses/lz4-java-1.8.0.jar.sha1 diff --git a/server/licenses/lz4-java-LICENSE.txt b/sandbox/modules/custom-codecs/licenses/lz4-java-LICENSE.txt similarity index 100% rename from server/licenses/lz4-java-LICENSE.txt rename to sandbox/modules/custom-codecs/licenses/lz4-java-LICENSE.txt diff --git a/server/licenses/lz4-java-NOTICE.txt b/sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt similarity index 100% rename from server/licenses/lz4-java-NOTICE.txt rename to sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt diff --git a/server/licenses/zstd-jni-1.5.2-3.jar.sha1 b/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 similarity index 100% rename from server/licenses/zstd-jni-1.5.2-3.jar.sha1 rename to sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 diff --git a/server/licenses/zstd-jni-LICENSE.txt b/sandbox/modules/custom-codecs/licenses/zstd-jni-LICENSE.txt similarity index 100% rename from server/licenses/zstd-jni-LICENSE.txt rename to sandbox/modules/custom-codecs/licenses/zstd-jni-LICENSE.txt diff --git a/server/licenses/zstd-jni-NOTICE.txt b/sandbox/modules/custom-codecs/licenses/zstd-jni-NOTICE.txt similarity index 100% rename from server/licenses/zstd-jni-NOTICE.txt rename to sandbox/modules/custom-codecs/licenses/zstd-jni-NOTICE.txt diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java new file mode 100644 index 0000000000000..4c19e0a748d5b --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodec; + +import org.opensearch.plugins.Plugin; +import org.opensearch.plugins.EnginePlugin; +import org.opensearch.index.codec.CodecServiceFactory; +import org.opensearch.index.IndexSettings; + +import java.util.Optional; + +/** + * A plugin that implements zstd (with and without dictionary) and lz4 (native) compression algorithms. + */ +public class CustomCodecPlugin extends Plugin implements EnginePlugin { + /** + * @param indexSettings is the default indexSettings + * @return the engine factory + */ + @Override + public Optional getCustomCodecServiceFactory(final IndexSettings indexSettings) { + return Optional.of(new CustomCodecServiceFactory()); + } +} diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java new file mode 100644 index 0000000000000..178f88b3e809d --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java @@ -0,0 +1,63 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodec; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.opensearch.common.collect.MapBuilder; +import org.opensearch.index.codec.CodecService; +import org.opensearch.index.mapper.MapperService; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Map; + +public class CustomCodecService extends CodecService { + private final Map codecs; + + public CustomCodecService(MapperService mapperService, Logger logger) { + super(mapperService, logger); + final MapBuilder codecs = MapBuilder.newMapBuilder(); + if (mapperService == null) { + codecs.put(Lucene92CustomCodec.Mode.ZSTD.name(), new ZstdCodec()); + codecs.put(Lucene92CustomCodec.Mode.ZSTDNODICT.name(), new ZstdNoDictCodec()); + codecs.put(Lucene92CustomCodec.Mode.LZ4.name(), new Lz4Codec()); + } else { + codecs.put( + Lucene92CustomCodec.Mode.ZSTD.name(), + new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.ZSTD, mapperService, logger) + ); + codecs.put( + Lucene92CustomCodec.Mode.ZSTDNODICT.name(), + new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.ZSTDNODICT, mapperService, logger) + ); + codecs.put( + Lucene92CustomCodec.Mode.LZ4.name(), + new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.LZ4, mapperService, logger) + ); + } + this.codecs = codecs.immutableMap(); + } + + @Override + public Codec codec(String name) { + Codec codec = super.codec(name); + if (codec == null) { + codec = codecs.get(name); + } + return codec; + } + + @Override + public String[] availableCodecs() { + ArrayList ac = new ArrayList(Arrays.asList(super.availableCodecs())); + ac.addAll(codecs.keySet()); + return ac.toArray(new String[0]); + } +} diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java new file mode 100644 index 0000000000000..567d54a0eee70 --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodec; + +import org.opensearch.index.codec.CodecService; +import org.opensearch.index.codec.CodecServiceConfig; +import org.opensearch.index.codec.CodecServiceFactory; + +public class CustomCodecServiceFactory implements CodecServiceFactory { + @Override + public CodecService createCodecService(CodecServiceConfig config) { + return new CustomCodecService(config.getMapperService(), config.getLogger()); + } +} diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java new file mode 100644 index 0000000000000..04fca0be3e7a5 --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java @@ -0,0 +1,141 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodec; + +import org.apache.lucene.codecs.*; +import org.apache.lucene.codecs.lucene90.*; +import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; + +public class Lucene92CustomCodec extends Codec { + public static final int DEFAULT_COMPRESSION_LEVEL = 6; + + /** Each mode represents a compression algorithm. */ + public enum Mode { + ZSTD, + ZSTDNODICT, + LZ4 + } + + private final TermVectorsFormat vectorsFormat = new Lucene90TermVectorsFormat(); + private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat(); + private final SegmentInfoFormat segmentInfosFormat = new Lucene90SegmentInfoFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat(); + private final CompoundFormat compoundFormat = new Lucene90CompoundFormat(); + private final NormsFormat normsFormat = new Lucene90NormsFormat(); + + private final PostingsFormat defaultPostingsFormat; + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return Lucene92CustomCodec.this.getPostingsFormatForField(field); + } + }; + + private final DocValuesFormat defaultDVFormat; + private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return Lucene92CustomCodec.this.getDocValuesFormatForField(field); + } + }; + + private final KnnVectorsFormat defaultKnnVectorsFormat; + private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return Lucene92CustomCodec.this.getKnnVectorsFormatForField(field); + } + }; + + private final StoredFieldsFormat storedFieldsFormat; + + /** Default codec */ + public Lucene92CustomCodec() { + this(Mode.LZ4); + } + + /** new codec for a given compression algorithm and default compression level */ + public Lucene92CustomCodec(Mode mode) { + super(mode.name()); + this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(mode); + this.defaultPostingsFormat = new Lucene90PostingsFormat(); + this.defaultDVFormat = new Lucene90DocValuesFormat(); + this.defaultKnnVectorsFormat = new Lucene92HnswVectorsFormat(); + } + + @Override + public final StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; + } + + @Override + public final TermVectorsFormat termVectorsFormat() { + return vectorsFormat; + } + + @Override + public final PostingsFormat postingsFormat() { + return postingsFormat; + } + + @Override + public final FieldInfosFormat fieldInfosFormat() { + return fieldInfosFormat; + } + + @Override + public final SegmentInfoFormat segmentInfoFormat() { + return segmentInfosFormat; + } + + @Override + public final LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } + + @Override + public final CompoundFormat compoundFormat() { + return compoundFormat; + } + + @Override + public final PointsFormat pointsFormat() { + return new Lucene90PointsFormat(); + } + + @Override + public final KnnVectorsFormat knnVectorsFormat() { + return knnVectorsFormat; + } + + public PostingsFormat getPostingsFormatForField(String field) { + return defaultPostingsFormat; + } + + public DocValuesFormat getDocValuesFormatForField(String field) { + return defaultDVFormat; + } + + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return defaultKnnVectorsFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } + + @Override + public final NormsFormat normsFormat() { + return normsFormat; + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomStoredFieldsFormat.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java similarity index 77% rename from server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomStoredFieldsFormat.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java index 85ea93b95cc19..a47f9b0b2ad3e 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomStoredFieldsFormat.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.apache.lucene.codecs.experimental; +package org.opensearch.index.codec.customcodec; import java.io.IOException; import java.util.Objects; @@ -20,35 +20,40 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -/** Stored field format used by plugaable codec */ +/** Stored field format used by pluggable codec */ public class Lucene92CustomStoredFieldsFormat extends StoredFieldsFormat { + public static final String MODE_KEY = Lucene92CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; + private static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024; - private static final int LZ4_NATIVE_BLOCK_LENGTH = 10 * 8 * 1024; private static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096; private static final int ZSTD_BLOCK_SHIFT = 10; + + private static final int LZ4_BLOCK_LENGTH = 10 * 8 * 1024; private static final int LZ4_MAX_DOCS_PER_BLOCK = 1024; private static final int LZ4_BLOCK_SHIFT = 10; - private final CompressionMode ZSTD_MODE; - private final CompressionMode ZSTD_MODE_NO_DICT; - private final CompressionMode LZ4_MODE; + private final CompressionMode zstdCompressionMode; + private final CompressionMode zstdNoDictCompressionMode; + private final CompressionMode lz4CompressionMode; private final Lucene92CustomCodec.Mode mode; - public static final String MODE_KEY = Lucene92CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; - /** default constructor */ public Lucene92CustomStoredFieldsFormat() { - this(Lucene92CustomCodec.Mode.LZ4_NATIVE, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); + this(Lucene92CustomCodec.Mode.LZ4, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); + } + + public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode) { + this(mode, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); } /** Stored fields format with specified compression algo. */ public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode, int compressionLevel) { this.mode = Objects.requireNonNull(mode); - ZSTD_MODE = new ZstdCompressionMode(compressionLevel); - ZSTD_MODE_NO_DICT = new ZstdNoDictCompressionMode(compressionLevel); - LZ4_MODE = new LZ4CompressionMode(); + zstdCompressionMode = new ZstdCompressionMode(compressionLevel); + zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel); + lz4CompressionMode = new Lz4CompressionMode(); } @Override @@ -77,24 +82,24 @@ private StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { case ZSTD: return new Lucene90CompressingStoredFieldsFormat( "CustomStoredFieldsZstd", - ZSTD_MODE, + zstdCompressionMode, ZSTD_BLOCK_LENGTH, ZSTD_MAX_DOCS_PER_BLOCK, ZSTD_BLOCK_SHIFT ); - case ZSTD_NO_DICT: + case ZSTDNODICT: return new Lucene90CompressingStoredFieldsFormat( "CustomStoredFieldsZstdNoDict", - ZSTD_MODE_NO_DICT, + zstdNoDictCompressionMode, ZSTD_BLOCK_LENGTH, ZSTD_MAX_DOCS_PER_BLOCK, ZSTD_BLOCK_SHIFT ); - case LZ4_NATIVE: + case LZ4: return new Lucene90CompressingStoredFieldsFormat( "CustomStoredFieldsLz4", - LZ4_MODE, - LZ4_NATIVE_BLOCK_LENGTH, + lz4CompressionMode, + LZ4_BLOCK_LENGTH, LZ4_MAX_DOCS_PER_BLOCK, LZ4_BLOCK_SHIFT ); diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java new file mode 100644 index 0000000000000..e97a1bfde8426 --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodec; + +/** Custom codec for different compression algorithm */ +public class Lz4Codec extends Lucene92CustomCodec { + + public Lz4Codec() { + this(DEFAULT_COMPRESSION_LEVEL); + } + + public Lz4Codec(int compressionLevel) { + super(Mode.LZ4); + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4CompressionMode.java similarity index 94% rename from server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4CompressionMode.java index 19c2930ccb72c..c0d9395db990b 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/LZ4CompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4CompressionMode.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.apache.lucene.codecs.experimental; +package org.opensearch.index.codec.customcodec; import java.io.IOException; import net.jpountz.lz4.LZ4Compressor; @@ -21,21 +21,21 @@ import org.apache.lucene.util.BytesRef; /** LZ4 JNI based Compression Mode */ -public class LZ4CompressionMode extends CompressionMode { +public class Lz4CompressionMode extends CompressionMode { private static final int NUM_SUB_BLOCKS = 10; /** default constructor */ - protected LZ4CompressionMode() {} + protected Lz4CompressionMode() {} @Override public Compressor newCompressor() { - return new LZ4CompressionMode.LZ4InnerCompressor(); + return new Lz4CompressionMode.LZ4InnerCompressor(); } @Override public Decompressor newDecompressor() { - return new LZ4CompressionMode.LZ4InnerDecompressor(); + return new Lz4CompressionMode.LZ4InnerDecompressor(); } /** LZ4 compressor */ diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java new file mode 100644 index 0000000000000..c3c763659bf63 --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodec; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; +import org.opensearch.index.mapper.CompletionFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; + +public class PerFieldMappingPostingFormatCodec extends Lucene92CustomCodec { + private final Logger logger; + private final MapperService mapperService; + private final DocValuesFormat dvFormat = new Lucene90DocValuesFormat(); + + public PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService, Logger logger) { + super(compressionMode); + this.mapperService = mapperService; + this.logger = logger; + } + + @Override + public PostingsFormat getPostingsFormatForField(String field) { + final MappedFieldType fieldType = mapperService.fieldType(field); + if (fieldType == null) { + logger.warn("no index mapper found for field: [{}] returning default postings format", field); + } else if (fieldType instanceof CompletionFieldMapper.CompletionFieldType) { + return CompletionFieldMapper.CompletionFieldType.postingsFormat(); + } + return super.getPostingsFormatForField(field); + } + + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return dvFormat; + } +} diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java new file mode 100644 index 0000000000000..834a07297a163 --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodec; + +/** Custom codec for different compression algorithm */ +public class ZstdCodec extends Lucene92CustomCodec { + + /** new codec for a given compression algorithm and compression level */ + public ZstdCodec() { + this(DEFAULT_COMPRESSION_LEVEL); + } + + public ZstdCodec(int compressionLevel) { + super(Mode.ZSTD); + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java similarity index 99% rename from server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java index f558ba0756cd2..e4e5c80c2005b 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdCompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.apache.lucene.codecs.experimental; +package org.opensearch.index.codec.customcodec; import com.github.luben.zstd.*; import java.io.IOException; diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java new file mode 100644 index 0000000000000..fa0082d18133a --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodec; + +/** Custom codec for different compression algorithm */ +public class ZstdNoDictCodec extends Lucene92CustomCodec { + + public ZstdNoDictCodec() { + this(DEFAULT_COMPRESSION_LEVEL); + } + + public ZstdNoDictCodec(int compressionLevel) { + super(Mode.ZSTDNODICT); + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java similarity index 98% rename from server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java index c342b1e2d014c..680fba66eb5ec 100644 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/ZstdNoDictCompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.apache.lucene.codecs.experimental; +package org.opensearch.index.codec.customcodec; import com.github.luben.zstd.Zstd; import java.io.IOException; @@ -50,7 +50,7 @@ public Decompressor newDecompressor() { private static final class ZSTDCompressor extends Compressor { private final int compressionLevel; - byte[] compressedBuffer; + private byte[] compressedBuffer; /** compressor with a given compresion level */ public ZSTDCompressor(int compressionLevel) { diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java new file mode 100644 index 0000000000000..943a814b25c2f --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * A plugin that implements native codecs. + */ +package org.opensearch.index.codec.customcodec; diff --git a/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec new file mode 100644 index 0000000000000..4df6c1db0637d --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -0,0 +1,3 @@ +org.opensearch.index.codec.customcodec.Lz4Codec +org.opensearch.index.codec.customcodec.ZstdCodec +org.opensearch.index.codec.customcodec.ZstdNoDictCodec diff --git a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java new file mode 100644 index 0000000000000..30d8625eea49b --- /dev/null +++ b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java @@ -0,0 +1,88 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodec; + +import org.apache.logging.log4j.LogManager; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.opensearch.common.settings.Settings; +import org.opensearch.env.Environment; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.codec.CodecService; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.similarity.SimilarityService; +import org.opensearch.indices.mapper.MapperRegistry; +import org.opensearch.plugins.MapperPlugin; +import org.opensearch.test.IndexSettingsModule; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.Collections; + +@LuceneTestCase.SuppressCodecs("*") // we test against default codec so never get a random one here! +public class CustomCodecTests extends OpenSearchTestCase { + + public void testZstdCompression() throws Exception { + Codec codec = createCodecService().codec("ZSTD"); + assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.ZSTD, codec); + } + + public void testZstdNoDictCompression() throws Exception { + Codec codec = createCodecService().codec("ZSTDNODICT"); + assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.ZSTDNODICT, codec); + } + + public void testLz4NativeCompression() throws Exception { + Codec codec = createCodecService().codec("LZ4"); + assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.LZ4, codec); + } + + private void assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode expected, Codec actual) throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(null); + iwc.setCodec(actual); + IndexWriter iw = new IndexWriter(dir, iwc); + iw.addDocument(new Document()); + iw.commit(); + iw.close(); + DirectoryReader ir = DirectoryReader.open(dir); + SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); + String v = sr.getSegmentInfo().info.getAttribute(Lucene92CustomStoredFieldsFormat.MODE_KEY); + assertNotNull(v); + assertEquals(expected, Lucene92CustomCodec.Mode.valueOf(v)); + ir.close(); + dir.close(); + } + + private CodecService createCodecService() throws IOException { + Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); + IndexSettings settings = IndexSettingsModule.newIndexSettings("_na", nodeSettings); + SimilarityService similarityService = new SimilarityService(settings, null, Collections.emptyMap()); + IndexAnalyzers indexAnalyzers = createTestAnalysis(settings, nodeSettings).indexAnalyzers; + MapperRegistry mapperRegistry = new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER); + MapperService service = new MapperService( + settings, + indexAnalyzers, + xContentRegistry(), + similarityService, + mapperRegistry, + () -> null, + () -> false, + null + ); + return new CodecService(service, LogManager.getLogger("test")); + } +} diff --git a/server/build.gradle b/server/build.gradle index 8c38ee8081720..9d9d12e798eab 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -134,10 +134,6 @@ dependencies { // jna api "net.java.dev.jna:jna:${versions.jna}" - // lz4 (native) and zstd - api "org.lz4:lz4-java:${versions.lz4}" - api "com.github.luben:zstd-jni:${versions.zstd}" - testImplementation(project(":test:framework")) { // tests use the locally compiled version of server exclude group: 'org.opensearch', module: 'server' @@ -225,11 +221,6 @@ tasks.named("processResources").configure { dependsOn generateModulesList, generatePluginsList } -// lz4-java uses sun.misc.Unsafe in the UnsafeUtils class. -thirdPartyAudit.ignoreViolations( - 'net.jpountz.util.UnsafeUtils' -) - tasks.named("thirdPartyAudit").configure { ignoreMissingClasses( // from com.fasterxml.jackson.dataformat.yaml.YAMLMapper (jackson-dataformat-yaml) diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java deleted file mode 100644 index 1fecc6b0d8f4b..0000000000000 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/Lucene92CustomCodec.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.apache.lucene.codecs.experimental; - -import org.apache.lucene.codecs.FilterCodec; -import org.apache.lucene.codecs.StoredFieldsFormat; -import org.apache.lucene.codecs.lucene92.Lucene92Codec; - -/** Custom codec for different compression algorithm */ -public class Lucene92CustomCodec extends FilterCodec { - - public static final int DEFAULT_COMPRESSION_LEVEL = 6; - - private static final int ZSTD_MIN_CLEVEL = -(1 << 17); - private static final int ZSTD_MAX_CLEVEL = 22; - - private final StoredFieldsFormat storedFieldsFormat; - - /** Compression modes */ - public static enum Mode { - // Zstandard with dictionary - ZSTD, - // Zstandard without dictionary - ZSTD_NO_DICT, - // lz4 native - LZ4_NATIVE - } - - /** Default codec */ - public Lucene92CustomCodec() { - this(Mode.LZ4_NATIVE, DEFAULT_COMPRESSION_LEVEL); - } - - /** new codec for a given compression algorithm and default compression level */ - public Lucene92CustomCodec(Mode compressionMode) { - this(compressionMode, DEFAULT_COMPRESSION_LEVEL); - } - - /** new codec for a given compression algorithm and compression level */ - public Lucene92CustomCodec(Mode compressionMode, int compressionLevel) { - super("Lucene92CustomCodec", new Lucene92Codec()); - - switch (compressionMode) { - case ZSTD: - if (compressionLevel < ZSTD_MIN_CLEVEL || compressionLevel > ZSTD_MAX_CLEVEL) throw new IllegalArgumentException( - "Invalid compression level" - ); - - this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(Mode.ZSTD, compressionLevel); - break; - case ZSTD_NO_DICT: - if (compressionLevel < ZSTD_MIN_CLEVEL || compressionLevel > ZSTD_MAX_CLEVEL) throw new IllegalArgumentException( - "Invalid compression level" - ); - - this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(Mode.ZSTD_NO_DICT, compressionLevel); - break; - case LZ4_NATIVE: - this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(Mode.LZ4_NATIVE, compressionLevel); - break; - default: - throw new IllegalArgumentException("Chosen compression mode does not exist"); - } - } - - @Override - public StoredFieldsFormat storedFieldsFormat() { - return storedFieldsFormat; - } - - @Override - public String toString() { - return getClass().getSimpleName(); - } -} diff --git a/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java b/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java deleted file mode 100644 index a91c96a4b635e..0000000000000 --- a/server/src/main/java/org/apache/lucene/codecs/experimental/PerFieldMappingPostingFormatCustomCodec.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.apache.lucene.codecs.experimental; - -import org.opensearch.index.mapper.MapperService; - -public class PerFieldMappingPostingFormatCustomCodec extends Lucene92CustomCodec { - - private final MapperService mapperService; - - public PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService) { - super(compressionMode); - this.mapperService = mapperService; - } -} diff --git a/server/src/main/java/org/opensearch/index/codec/CodecService.java b/server/src/main/java/org/opensearch/index/codec/CodecService.java index c16deb31703c6..ff254a63fadb6 100644 --- a/server/src/main/java/org/opensearch/index/codec/CodecService.java +++ b/server/src/main/java/org/opensearch/index/codec/CodecService.java @@ -36,8 +36,6 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene92.Lucene92Codec; import org.apache.lucene.codecs.lucene92.Lucene92Codec.Mode; -import org.apache.lucene.codecs.experimental.Lucene92CustomCodec; -import org.apache.lucene.codecs.experimental.PerFieldMappingPostingFormatCustomCodec; import org.opensearch.common.Nullable; import org.opensearch.common.collect.MapBuilder; import org.opensearch.index.mapper.MapperService; @@ -60,28 +58,15 @@ public class CodecService { public static final String BEST_COMPRESSION_CODEC = "best_compression"; /** the raw unfiltered lucene default. useful for testing */ public static final String LUCENE_DEFAULT_CODEC = "lucene_default"; - /** zstd (with and without dictionary) and lz4 (native) compression */ - public static final String ZSTD_CODEC = "zstd"; - public static final String ZSTD_NO_DICT_CODEC = "zstd_no_dict"; - public static final String LZ4_NATIVE_CODEC = "lz4_native"; public CodecService(@Nullable MapperService mapperService, Logger logger) { final MapBuilder codecs = MapBuilder.newMapBuilder(); if (mapperService == null) { codecs.put(DEFAULT_CODEC, new Lucene92Codec()); codecs.put(BEST_COMPRESSION_CODEC, new Lucene92Codec(Mode.BEST_COMPRESSION)); - codecs.put(ZSTD_CODEC, new Lucene92CustomCodec(Lucene92CustomCodec.Mode.ZSTD)); - codecs.put(ZSTD_NO_DICT_CODEC, new Lucene92CustomCodec(Lucene92CustomCodec.Mode.ZSTD_NO_DICT)); - codecs.put(LZ4_NATIVE_CODEC, new Lucene92CustomCodec(Lucene92CustomCodec.Mode.LZ4_NATIVE)); } else { codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); - codecs.put(ZSTD_CODEC, new PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode.ZSTD, mapperService)); - codecs.put( - ZSTD_NO_DICT_CODEC, - new PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode.ZSTD_NO_DICT, mapperService) - ); - codecs.put(LZ4_NATIVE_CODEC, new PerFieldMappingPostingFormatCustomCodec(Lucene92CustomCodec.Mode.LZ4_NATIVE, mapperService)); } codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); for (String codec : Codec.availableCodecs()) { diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index dea9175c886df..4ae6646ed14f0 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -123,9 +123,6 @@ public Supplier retentionLeasesSupplier() { case "default": case "best_compression": case "lucene_default": - case "zstd": - case "zstd_no_dict": - case "lz4_native": return s; default: if (Codec.availableCodecs().contains(s) == false) { // we don't error message the not officially supported ones diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec deleted file mode 100644 index cda4862523bd3..0000000000000 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ /dev/null @@ -1 +0,0 @@ -org.apache.lucene.codecs.experimental.Lucene92CustomCodec diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index 3189a05096841..0275066f9af1b 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -53,8 +53,6 @@ import org.opensearch.plugins.MapperPlugin; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.IndexSettingsModule; -import org.apache.lucene.codecs.experimental.Lucene92CustomCodec; -import org.apache.lucene.codecs.experimental.Lucene92CustomStoredFieldsFormat; import java.io.IOException; import java.util.Collections; @@ -80,21 +78,6 @@ public void testBestCompression() throws Exception { assertStoredFieldsCompressionEquals(Lucene92Codec.Mode.BEST_COMPRESSION, codec); } - public void testZstdCompression() throws Exception { - Codec codec = createCodecService().codec("zstd"); - assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.ZSTD, codec); - } - - public void testZstdNoDictCompression() throws Exception { - Codec codec = createCodecService().codec("zstd_no_dict"); - assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.ZSTD_NO_DICT, codec); - } - - public void testLz4NativeCompression() throws Exception { - Codec codec = createCodecService().codec("lz4_native"); - assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.LZ4_NATIVE, codec); - } - // write some docs with it, inspect .si to see this was the used compression private void assertStoredFieldsCompressionEquals(Lucene92Codec.Mode expected, Codec actual) throws Exception { Directory dir = newDirectory(); @@ -113,23 +96,6 @@ private void assertStoredFieldsCompressionEquals(Lucene92Codec.Mode expected, Co dir.close(); } - private void assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode expected, Codec actual) throws Exception { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(null); - iwc.setCodec(actual); - IndexWriter iw = new IndexWriter(dir, iwc); - iw.addDocument(new Document()); - iw.commit(); - iw.close(); - DirectoryReader ir = DirectoryReader.open(dir); - SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); - String v = sr.getSegmentInfo().info.getAttribute(Lucene92CustomStoredFieldsFormat.MODE_KEY); - assertNotNull(v); - assertEquals(expected, Lucene92CustomCodec.Mode.valueOf(v)); - ir.close(); - dir.close(); - } - private CodecService createCodecService() throws IOException { Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); IndexSettings settings = IndexSettingsModule.newIndexSettings("_na", nodeSettings); From 3390e085c632f17206d13825c5a95eeeace57794 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Wed, 6 Jul 2022 19:22:37 +0000 Subject: [PATCH 12/25] Remove blank lines. Signed-off-by: Mulugeta Mammo --- .../src/main/resources/org/opensearch/bootstrap/security.policy | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/main/resources/org/opensearch/bootstrap/security.policy b/server/src/main/resources/org/opensearch/bootstrap/security.policy index ffe301793e89a..87b6d392382a8 100644 --- a/server/src/main/resources/org/opensearch/bootstrap/security.policy +++ b/server/src/main/resources/org/opensearch/bootstrap/security.policy @@ -25,7 +25,6 @@ * under the License. */ - /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. @@ -80,7 +79,6 @@ grant codeBase "${codebase.jna}" { permission java.lang.RuntimePermission "accessDeclaredMembers"; }; - //// Everything else: grant { From a168c722f39d50095b337be50fcd37724a402843 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Wed, 6 Jul 2022 21:08:39 +0000 Subject: [PATCH 13/25] Restore Lucene92CustomCodec to extend from FilterCodec. Signed-off-by: Mulugeta Mammo --- .../codec/customcodec/CustomCodecService.java | 6 +- .../customcodec/Lucene92CustomCodec.java | 111 ++---------------- .../PerFieldMappingPostingFormatCodec.java | 28 +---- .../org/opensearch/bootstrap/security.policy | 1 - 4 files changed, 15 insertions(+), 131 deletions(-) diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java index 178f88b3e809d..17123ca585348 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java @@ -31,15 +31,15 @@ public CustomCodecService(MapperService mapperService, Logger logger) { } else { codecs.put( Lucene92CustomCodec.Mode.ZSTD.name(), - new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.ZSTD, mapperService, logger) + new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.ZSTD, mapperService) ); codecs.put( Lucene92CustomCodec.Mode.ZSTDNODICT.name(), - new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.ZSTDNODICT, mapperService, logger) + new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.ZSTDNODICT, mapperService) ); codecs.put( Lucene92CustomCodec.Mode.LZ4.name(), - new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.LZ4, mapperService, logger) + new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.LZ4, mapperService) ); } this.codecs = codecs.immutableMap(); diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java index 04fca0be3e7a5..16d45fe9009d9 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java @@ -9,13 +9,9 @@ package org.opensearch.index.codec.customcodec; import org.apache.lucene.codecs.*; -import org.apache.lucene.codecs.lucene90.*; -import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat; -import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; -import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; -import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; +import org.apache.lucene.codecs.lucene92.Lucene92Codec; -public class Lucene92CustomCodec extends Codec { +public class Lucene92CustomCodec extends FilterCodec { public static final int DEFAULT_COMPRESSION_LEVEL = 6; /** Each mode represents a compression algorithm. */ @@ -25,37 +21,6 @@ public enum Mode { LZ4 } - private final TermVectorsFormat vectorsFormat = new Lucene90TermVectorsFormat(); - private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat(); - private final SegmentInfoFormat segmentInfosFormat = new Lucene90SegmentInfoFormat(); - private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat(); - private final CompoundFormat compoundFormat = new Lucene90CompoundFormat(); - private final NormsFormat normsFormat = new Lucene90NormsFormat(); - - private final PostingsFormat defaultPostingsFormat; - private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { - @Override - public PostingsFormat getPostingsFormatForField(String field) { - return Lucene92CustomCodec.this.getPostingsFormatForField(field); - } - }; - - private final DocValuesFormat defaultDVFormat; - private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { - @Override - public DocValuesFormat getDocValuesFormatForField(String field) { - return Lucene92CustomCodec.this.getDocValuesFormatForField(field); - } - }; - - private final KnnVectorsFormat defaultKnnVectorsFormat; - private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return Lucene92CustomCodec.this.getKnnVectorsFormatForField(field); - } - }; - private final StoredFieldsFormat storedFieldsFormat; /** Default codec */ @@ -65,77 +30,21 @@ public Lucene92CustomCodec() { /** new codec for a given compression algorithm and default compression level */ public Lucene92CustomCodec(Mode mode) { - super(mode.name()); - this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(mode); - this.defaultPostingsFormat = new Lucene90PostingsFormat(); - this.defaultDVFormat = new Lucene90DocValuesFormat(); - this.defaultKnnVectorsFormat = new Lucene92HnswVectorsFormat(); - } - - @Override - public final StoredFieldsFormat storedFieldsFormat() { - return storedFieldsFormat; + this(mode, DEFAULT_COMPRESSION_LEVEL); } - @Override - public final TermVectorsFormat termVectorsFormat() { - return vectorsFormat; + public Lucene92CustomCodec(Mode mode, int compressionLevel) { + super(mode.name(), new Lucene92Codec()); + this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(mode, compressionLevel); } @Override - public final PostingsFormat postingsFormat() { - return postingsFormat; - } - - @Override - public final FieldInfosFormat fieldInfosFormat() { - return fieldInfosFormat; - } - - @Override - public final SegmentInfoFormat segmentInfoFormat() { - return segmentInfosFormat; - } - - @Override - public final LiveDocsFormat liveDocsFormat() { - return liveDocsFormat; - } - - @Override - public final CompoundFormat compoundFormat() { - return compoundFormat; - } - - @Override - public final PointsFormat pointsFormat() { - return new Lucene90PointsFormat(); - } - - @Override - public final KnnVectorsFormat knnVectorsFormat() { - return knnVectorsFormat; - } - - public PostingsFormat getPostingsFormatForField(String field) { - return defaultPostingsFormat; - } - - public DocValuesFormat getDocValuesFormatForField(String field) { - return defaultDVFormat; - } - - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return defaultKnnVectorsFormat; - } - - @Override - public final DocValuesFormat docValuesFormat() { - return docValuesFormat; + public StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; } @Override - public final NormsFormat normsFormat() { - return normsFormat; + public String toString() { + return getClass().getSimpleName(); } } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java index c3c763659bf63..94be3b384eccb 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java @@ -8,38 +8,14 @@ package org.opensearch.index.codec.customcodec; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.codecs.DocValuesFormat; -import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; -import org.opensearch.index.mapper.CompletionFieldMapper; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; public class PerFieldMappingPostingFormatCodec extends Lucene92CustomCodec { - private final Logger logger; + private final MapperService mapperService; - private final DocValuesFormat dvFormat = new Lucene90DocValuesFormat(); - public PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService, Logger logger) { + public PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService) { super(compressionMode); this.mapperService = mapperService; - this.logger = logger; - } - - @Override - public PostingsFormat getPostingsFormatForField(String field) { - final MappedFieldType fieldType = mapperService.fieldType(field); - if (fieldType == null) { - logger.warn("no index mapper found for field: [{}] returning default postings format", field); - } else if (fieldType instanceof CompletionFieldMapper.CompletionFieldType) { - return CompletionFieldMapper.CompletionFieldType.postingsFormat(); - } - return super.getPostingsFormatForField(field); - } - - @Override - public DocValuesFormat getDocValuesFormatForField(String field) { - return dvFormat; } } diff --git a/server/src/main/resources/org/opensearch/bootstrap/security.policy b/server/src/main/resources/org/opensearch/bootstrap/security.policy index 87b6d392382a8..06767166e88d5 100644 --- a/server/src/main/resources/org/opensearch/bootstrap/security.policy +++ b/server/src/main/resources/org/opensearch/bootstrap/security.policy @@ -185,4 +185,3 @@ grant { // needed by zstd-jni and lz4-java permission java.lang.RuntimePermission "loadLibrary.*"; }; - From 1491da7e93e47f7058a43c8dcd91da452441b331 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Thu, 7 Jul 2022 02:53:07 +0000 Subject: [PATCH 14/25] Make use of the compressionLevel argument. Signed-off-by: Mulugeta Mammo --- .../java/org/opensearch/index/codec/customcodec/Lz4Codec.java | 2 +- .../java/org/opensearch/index/codec/customcodec/ZstdCodec.java | 2 +- .../org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java index e97a1bfde8426..8943b2db6e698 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java @@ -16,7 +16,7 @@ public Lz4Codec() { } public Lz4Codec(int compressionLevel) { - super(Mode.LZ4); + super(Mode.LZ4, compressionLevel); } @Override diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java index 834a07297a163..f909514013d15 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java @@ -17,7 +17,7 @@ public ZstdCodec() { } public ZstdCodec(int compressionLevel) { - super(Mode.ZSTD); + super(Mode.ZSTD, compressionLevel); } @Override diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java index fa0082d18133a..54c1bfbd07540 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java @@ -16,7 +16,7 @@ public ZstdNoDictCodec() { } public ZstdNoDictCodec(int compressionLevel) { - super(Mode.ZSTDNODICT); + super(Mode.ZSTDNODICT, compressionLevel); } @Override From f74f011de31290a3e66f709347ffd1e01c26d912 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Thu, 7 Jul 2022 13:19:00 +0000 Subject: [PATCH 15/25] Make Lucene92CustomCodec abstract and use a package-private access modifier. Signed-off-by: Mulugeta Mammo --- .../index/codec/customcodec/Lucene92CustomCodec.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java index 16d45fe9009d9..c8fefadb6cac7 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java @@ -11,7 +11,7 @@ import org.apache.lucene.codecs.*; import org.apache.lucene.codecs.lucene92.Lucene92Codec; -public class Lucene92CustomCodec extends FilterCodec { +abstract class Lucene92CustomCodec extends FilterCodec { public static final int DEFAULT_COMPRESSION_LEVEL = 6; /** Each mode represents a compression algorithm. */ @@ -23,11 +23,6 @@ public enum Mode { private final StoredFieldsFormat storedFieldsFormat; - /** Default codec */ - public Lucene92CustomCodec() { - this(Mode.LZ4); - } - /** new codec for a given compression algorithm and default compression level */ public Lucene92CustomCodec(Mode mode) { this(mode, DEFAULT_COMPRESSION_LEVEL); From bdbe4098aa0d4e9f73cf2d82d7f764d673bb35db Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Thu, 7 Jul 2022 15:34:15 +0000 Subject: [PATCH 16/25] Fix missing JavaDoc issues. Remove unused field in PerFieldMappingPostingFormatCodec. Signed-off-by: Mulugeta Mammo --- .../index/codec/customcodec/CustomCodecPlugin.java | 4 ++++ .../index/codec/customcodec/CustomCodecService.java | 9 +++++++++ .../customcodec/CustomCodecServiceFactory.java | 7 +++++++ .../Lucene92CustomStoredFieldsFormat.java | 13 ++++++++++++- .../index/codec/customcodec/Lz4Codec.java | 13 +++++++------ .../PerFieldMappingPostingFormatCodec.java | 10 +++++++--- .../index/codec/customcodec/ZstdCodec.java | 13 +++++++++++-- .../codec/customcodec/ZstdCompressionMode.java | 6 +++++- .../index/codec/customcodec/ZstdNoDictCodec.java | 12 +++++++++++- .../customcodec/ZstdNoDictCompressionMode.java | 8 ++++++-- 10 files changed, 79 insertions(+), 16 deletions(-) diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java index 4c19e0a748d5b..0acea7fdd7446 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java @@ -19,6 +19,10 @@ * A plugin that implements zstd (with and without dictionary) and lz4 (native) compression algorithms. */ public class CustomCodecPlugin extends Plugin implements EnginePlugin { + + /** Creates a new instance */ + public CustomCodecPlugin() {} + /** * @param indexSettings is the default indexSettings * @return the engine factory diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java index 17123ca585348..e2ded1dc4ef7c 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java @@ -18,9 +18,18 @@ import java.util.Arrays; import java.util.Map; +/** + * CustomCodecService provides ZSTD, ZSTDNODICT, and LZ4 compressors to be used in use-case specific file formats. + */ public class CustomCodecService extends CodecService { private final Map codecs; + /** + * Creates a new CustomCodecService. + * + * @param mapperService A mapper service. + * @param logger A logger. + */ public CustomCodecService(MapperService mapperService, Logger logger) { super(mapperService, logger); final MapBuilder codecs = MapBuilder.newMapBuilder(); diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java index 567d54a0eee70..135daa7519565 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java @@ -12,7 +12,14 @@ import org.opensearch.index.codec.CodecServiceConfig; import org.opensearch.index.codec.CodecServiceFactory; +/** + * A factory for creating new {@link CodecService} instance + */ public class CustomCodecServiceFactory implements CodecServiceFactory { + + /** Creates a new instance. */ + public CustomCodecServiceFactory() {} + @Override public CodecService createCodecService(CodecServiceConfig config) { return new CustomCodecService(config.getMapperService(), config.getLogger()); diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java index a47f9b0b2ad3e..db6979e41e31e 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java @@ -23,6 +23,7 @@ /** Stored field format used by pluggable codec */ public class Lucene92CustomStoredFieldsFormat extends StoredFieldsFormat { + /** A key that we use to map to a mode */ public static final String MODE_KEY = Lucene92CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; private static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024; @@ -44,11 +45,21 @@ public Lucene92CustomStoredFieldsFormat() { this(Lucene92CustomCodec.Mode.LZ4, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); } + /** + * Creates a new instance. + * + * @param mode The mode represents ZSTD, ZSTDNODICT, or LZ4. + */ public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode) { this(mode, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); } - /** Stored fields format with specified compression algo. */ + /** + * Creates a new instance with the specified mode and compression level. + * + * @param mode The mode represents ZSTD, ZSTDNODICT, or LZ4. + * @param compressionLevel The compression level for the mode. + */ public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode, int compressionLevel) { this.mode = Objects.requireNonNull(mode); zstdCompressionMode = new ZstdCompressionMode(compressionLevel); diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java index 8943b2db6e698..db7f71f1a0846 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java @@ -8,15 +8,16 @@ package org.opensearch.index.codec.customcodec; -/** Custom codec for different compression algorithm */ +/** + * Lz4Codec provides a native LZ4 implementation based on the lz4-java library. + */ public class Lz4Codec extends Lucene92CustomCodec { + /** + * Creates a new Lz4Codec instance. + */ public Lz4Codec() { - this(DEFAULT_COMPRESSION_LEVEL); - } - - public Lz4Codec(int compressionLevel) { - super(Mode.LZ4, compressionLevel); + super(Mode.LZ4); } @Override diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java index 94be3b384eccb..346ed776acd0e 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java @@ -10,12 +10,16 @@ import org.opensearch.index.mapper.MapperService; +/** PerFieldMappingPostingFormatCodec. {@link org.opensearch.index.codec.PerFieldMappingPostingFormatCodec} */ public class PerFieldMappingPostingFormatCodec extends Lucene92CustomCodec { - private final MapperService mapperService; - + /** + * Creates a new instance. + * + * @param compressionMode The compression mode (ZSTD, ZSTDNODICT, or LZ4). + * @param mapperService The mapper service. + */ public PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService) { super(compressionMode); - this.mapperService = mapperService; } } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java index f909514013d15..cc3f7ad4792f7 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java @@ -8,14 +8,23 @@ package org.opensearch.index.codec.customcodec; -/** Custom codec for different compression algorithm */ +/** + * ZstdCodec provides ZSTD compressor using the zstd-jni library. + */ public class ZstdCodec extends Lucene92CustomCodec { - /** new codec for a given compression algorithm and compression level */ + /** + * Creates a new ZstdCodec instance with the default compression level. + */ public ZstdCodec() { this(DEFAULT_COMPRESSION_LEVEL); } + /** + * Creates a new ZstdCodec instance. + * + * @param compressionLevel The compression level. + */ public ZstdCodec(int compressionLevel) { super(Mode.ZSTD, compressionLevel); } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java index e4e5c80c2005b..d515fcb83f7d7 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java @@ -32,7 +32,11 @@ protected ZstdCompressionMode() { this.compressionLevel = DEFAULT_COMPRESSION_LEVEL; } - /** compression mode for a given compression level */ + /** + * Creates a new instance. + * + * @param compressionLevel The compression level to use. + */ protected ZstdCompressionMode(int compressionLevel) { this.compressionLevel = compressionLevel; } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java index 54c1bfbd07540..d0c1e08495a28 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java @@ -8,13 +8,23 @@ package org.opensearch.index.codec.customcodec; -/** Custom codec for different compression algorithm */ +/** + * ZstdNoDictCodec provides ZSTD compressor without a dictionary support. + */ public class ZstdNoDictCodec extends Lucene92CustomCodec { + /** + * Creates a new ZstdNoDictCodec instance with the default compression level. + */ public ZstdNoDictCodec() { this(DEFAULT_COMPRESSION_LEVEL); } + /** + * Creates a new ZstdNoDictCodec instance. + * + * @param compressionLevel The compression level. + */ public ZstdNoDictCodec(int compressionLevel) { super(Mode.ZSTDNODICT, compressionLevel); } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java index 680fba66eb5ec..7e9593dbb4dd0 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java @@ -18,7 +18,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; -/** Zstandard Compression Mode */ +/** ZSTD Compression Mode (without a dictionary support). */ public class ZstdNoDictCompressionMode extends CompressionMode { private static final int NUM_SUB_BLOCKS = 10; @@ -31,7 +31,11 @@ protected ZstdNoDictCompressionMode() { this.compressionLevel = DEFAULT_COMPRESSION_LEVEL; } - /** compression mode for a given compression level */ + /** + * Creates a new instance with the given compression level. + * + * @param compressionLevel The compression level. + */ protected ZstdNoDictCompressionMode(int compressionLevel) { this.compressionLevel = compressionLevel; } From 8120dae215f706240a59f63f92475f037429e96f Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Thu, 7 Jul 2022 17:55:41 +0000 Subject: [PATCH 17/25] Fix lint errors. Signed-off-by: Mulugeta Mammo --- sandbox/modules/custom-codecs/build.gradle | 1 - sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt | 2 -- .../modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 | 2 +- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/sandbox/modules/custom-codecs/build.gradle b/sandbox/modules/custom-codecs/build.gradle index 3307aa84bf8cd..34c2767cbca73 100644 --- a/sandbox/modules/custom-codecs/build.gradle +++ b/sandbox/modules/custom-codecs/build.gradle @@ -32,4 +32,3 @@ thirdPartyAudit.ignoreViolations( yamlRestTest.enabled = false; testingConventions.enabled = false; - diff --git a/sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt b/sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt index a9f0e2b890655..dabde5a433b3e 100644 --- a/sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt +++ b/sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt @@ -1,3 +1 @@ LZ4 compression for Java, based on Yann Collet's work available at http://code.google.com/p/lz4/. - - diff --git a/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 b/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 index a57e9470a9166..2101183e90a13 100644 --- a/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 +++ b/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 @@ -1 +1 @@ -f52de0603f31798455e48bd90e10a8f888dd6d93 \ No newline at end of file +f52de0603f31798455e48bd90e10a8f888dd6d93 From 28bd09912965f7e1abcb4eee5ccfdffd0edcfcbd Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Thu, 7 Jul 2022 21:27:54 +0000 Subject: [PATCH 18/25] Fix the description for the custom-codecs plugin. Signed-off-by: Mulugeta Mammo --- sandbox/modules/custom-codecs/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sandbox/modules/custom-codecs/build.gradle b/sandbox/modules/custom-codecs/build.gradle index 34c2767cbca73..cfc72e632b659 100644 --- a/sandbox/modules/custom-codecs/build.gradle +++ b/sandbox/modules/custom-codecs/build.gradle @@ -14,7 +14,7 @@ apply plugin: 'opensearch.yaml-rest-test' opensearchplugin { name 'custom-codecs' - description 'A plugin that implements zstd (with and without dictionary) and lz4 (native) compression algorithms.' + description 'A plugin that implements custom codecs, including zstd and lz4 (native).' classname 'org.opensearch.index.codec.customcodec.CustomCodecPlugin' licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt') noticeFile rootProject.file('NOTICE.txt') From 51b825b2c454d40dd1c71266fef51002f52954d8 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Fri, 8 Jul 2022 19:24:55 +0000 Subject: [PATCH 19/25] Fix wildcard import and improve documentation. Signed-off-by: Mulugeta Mammo --- .../index/codec/customcodec/CustomCodecPlugin.java | 11 +++++++++-- .../index/codec/customcodec/Lucene92CustomCodec.java | 3 ++- .../index/codec/customcodec/ZstdCompressionMode.java | 6 +++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java index 0acea7fdd7446..33b371f6dfc28 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java @@ -16,9 +16,16 @@ import java.util.Optional; /** - * A plugin that implements zstd (with and without dictionary) and lz4 (native) compression algorithms. + * A plugin that implements custom codecs. Current custom codecs include: + *
    + *
  • LZ4 + *
  • ZSTD + *
  • ZSTDNODICT + *
+ * + * @opensearch.internal */ -public class CustomCodecPlugin extends Plugin implements EnginePlugin { +public final class CustomCodecPlugin extends Plugin implements EnginePlugin { /** Creates a new instance */ public CustomCodecPlugin() {} diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java index c8fefadb6cac7..f4d3adc8f57f9 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java @@ -8,7 +8,8 @@ package org.opensearch.index.codec.customcodec; -import org.apache.lucene.codecs.*; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.lucene92.Lucene92Codec; abstract class Lucene92CustomCodec extends FilterCodec { diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java index d515fcb83f7d7..d66df95ae142c 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java @@ -8,7 +8,11 @@ package org.opensearch.index.codec.customcodec; -import com.github.luben.zstd.*; +import com.github.luben.zstd.Zstd; +import com.github.luben.zstd.ZstdCompressCtx; +import com.github.luben.zstd.ZstdDecompressCtx; +import com.github.luben.zstd.ZstdDictCompress; +import com.github.luben.zstd.ZstdDictDecompress; import java.io.IOException; import org.apache.lucene.codecs.compressing.CompressionMode; import org.apache.lucene.codecs.compressing.Compressor; From 67575369c95f40aac2fd0816fe7e4f360b3a66b1 Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Fri, 17 Feb 2023 08:38:52 -0800 Subject: [PATCH 20/25] Access control exception fixed. Removed lz4-java support for now. - PRs were made to zstd-jni and lz4-java to use AccessController.doPrivileged. - The zstd-jni PR is merged since version 1.5.4-1. - The lz4-java support temporarily removed until the PR gets merged. Signed-off-by: Mulugeta Mammo --- sandbox/modules/custom-codecs/build.gradle | 7 +- .../licenses/lz4-java-1.8.0.jar.sha1 | 1 - .../licenses/lz4-java-LICENSE.txt | 202 ------------------ .../licenses/lz4-java-NOTICE.txt | 1 - .../licenses/zstd-jni-1.5.2-3.jar.sha1 | 1 - .../licenses/zstd-jni-1.5.4-1.jar.sha1 | 1 + .../codec/customcodec/CustomCodecPlugin.java | 3 +- .../codec/customcodec/CustomCodecService.java | 11 +- .../customcodec/Lucene92CustomCodec.java | 3 +- .../Lucene92CustomStoredFieldsFormat.java | 20 +- .../index/codec/customcodec/Lz4Codec.java | 27 --- .../codec/customcodec/Lz4CompressionMode.java | 147 ------------- .../PerFieldMappingPostingFormatCodec.java | 2 +- .../index/codec/customcodec/package-info.java | 2 +- .../plugin-metadata/plugin-security.policy | 36 ++++ .../services/org.apache.lucene.codecs.Codec | 1 - .../codec/customcodecs/CustomCodecTests.java | 5 - .../org/opensearch/bootstrap/security.policy | 2 - 18 files changed, 50 insertions(+), 422 deletions(-) delete mode 100644 sandbox/modules/custom-codecs/licenses/lz4-java-1.8.0.jar.sha1 delete mode 100644 sandbox/modules/custom-codecs/licenses/lz4-java-LICENSE.txt delete mode 100644 sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt delete mode 100644 sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 create mode 100644 sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.4-1.jar.sha1 delete mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java delete mode 100644 sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4CompressionMode.java create mode 100644 sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy diff --git a/sandbox/modules/custom-codecs/build.gradle b/sandbox/modules/custom-codecs/build.gradle index cfc72e632b659..94b6f3d615b93 100644 --- a/sandbox/modules/custom-codecs/build.gradle +++ b/sandbox/modules/custom-codecs/build.gradle @@ -14,18 +14,17 @@ apply plugin: 'opensearch.yaml-rest-test' opensearchplugin { name 'custom-codecs' - description 'A plugin that implements custom codecs, including zstd and lz4 (native).' + description 'A plugin that implements custom compression codecs.' classname 'org.opensearch.index.codec.customcodec.CustomCodecPlugin' licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt') noticeFile rootProject.file('NOTICE.txt') } dependencies { - api "org.lz4:lz4-java:1.8.0" - api "com.github.luben:zstd-jni:1.5.2-3" + api "com.github.luben:zstd-jni:1.5.4-1" } -// lz4-java uses sun.misc.Unsafe in the UnsafeUtils class. +// Ignore sun.misc.Unsafe thirdPartyAudit.ignoreViolations( 'net.jpountz.util.UnsafeUtils' ) diff --git a/sandbox/modules/custom-codecs/licenses/lz4-java-1.8.0.jar.sha1 b/sandbox/modules/custom-codecs/licenses/lz4-java-1.8.0.jar.sha1 deleted file mode 100644 index 1aac31a3d4cb0..0000000000000 --- a/sandbox/modules/custom-codecs/licenses/lz4-java-1.8.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4b986a99445e49ea5fbf5d149c4b63f6ed6c6780 diff --git a/sandbox/modules/custom-codecs/licenses/lz4-java-LICENSE.txt b/sandbox/modules/custom-codecs/licenses/lz4-java-LICENSE.txt deleted file mode 100644 index d645695673349..0000000000000 --- a/sandbox/modules/custom-codecs/licenses/lz4-java-LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt b/sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt deleted file mode 100644 index dabde5a433b3e..0000000000000 --- a/sandbox/modules/custom-codecs/licenses/lz4-java-NOTICE.txt +++ /dev/null @@ -1 +0,0 @@ -LZ4 compression for Java, based on Yann Collet's work available at http://code.google.com/p/lz4/. diff --git a/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 b/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 deleted file mode 100644 index 2101183e90a13..0000000000000 --- a/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.2-3.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -f52de0603f31798455e48bd90e10a8f888dd6d93 diff --git a/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.4-1.jar.sha1 b/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.4-1.jar.sha1 new file mode 100644 index 0000000000000..e95377f702a6c --- /dev/null +++ b/sandbox/modules/custom-codecs/licenses/zstd-jni-1.5.4-1.jar.sha1 @@ -0,0 +1 @@ +291ccaacc039e41932de877303edb6af98a91c24 diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java index 33b371f6dfc28..034794826848a 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java @@ -16,9 +16,8 @@ import java.util.Optional; /** - * A plugin that implements custom codecs. Current custom codecs include: + * A plugin that implements custom codecs. Supports these codecs: *
    - *
  • LZ4 *
  • ZSTD *
  • ZSTDNODICT *
diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java index e2ded1dc4ef7c..ab69fa52292d0 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java @@ -19,7 +19,7 @@ import java.util.Map; /** - * CustomCodecService provides ZSTD, ZSTDNODICT, and LZ4 compressors to be used in use-case specific file formats. + * CustomCodecService provides ZSTD and ZSTDNODICT compression codecs. */ public class CustomCodecService extends CodecService { private final Map codecs; @@ -36,7 +36,6 @@ public CustomCodecService(MapperService mapperService, Logger logger) { if (mapperService == null) { codecs.put(Lucene92CustomCodec.Mode.ZSTD.name(), new ZstdCodec()); codecs.put(Lucene92CustomCodec.Mode.ZSTDNODICT.name(), new ZstdNoDictCodec()); - codecs.put(Lucene92CustomCodec.Mode.LZ4.name(), new Lz4Codec()); } else { codecs.put( Lucene92CustomCodec.Mode.ZSTD.name(), @@ -46,19 +45,15 @@ public CustomCodecService(MapperService mapperService, Logger logger) { Lucene92CustomCodec.Mode.ZSTDNODICT.name(), new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.ZSTDNODICT, mapperService) ); - codecs.put( - Lucene92CustomCodec.Mode.LZ4.name(), - new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.LZ4, mapperService) - ); } this.codecs = codecs.immutableMap(); } @Override public Codec codec(String name) { - Codec codec = super.codec(name); + Codec codec = codecs.get(name); if (codec == null) { - codec = codecs.get(name); + return super.codec(name); } return codec; } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java index f4d3adc8f57f9..24e38ebc599f1 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java @@ -18,8 +18,7 @@ abstract class Lucene92CustomCodec extends FilterCodec { /** Each mode represents a compression algorithm. */ public enum Mode { ZSTD, - ZSTDNODICT, - LZ4 + ZSTDNODICT } private final StoredFieldsFormat storedFieldsFormat; diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java index db6979e41e31e..870cf0f9e51fa 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java @@ -30,25 +30,20 @@ public class Lucene92CustomStoredFieldsFormat extends StoredFieldsFormat { private static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096; private static final int ZSTD_BLOCK_SHIFT = 10; - private static final int LZ4_BLOCK_LENGTH = 10 * 8 * 1024; - private static final int LZ4_MAX_DOCS_PER_BLOCK = 1024; - private static final int LZ4_BLOCK_SHIFT = 10; - private final CompressionMode zstdCompressionMode; private final CompressionMode zstdNoDictCompressionMode; - private final CompressionMode lz4CompressionMode; private final Lucene92CustomCodec.Mode mode; /** default constructor */ public Lucene92CustomStoredFieldsFormat() { - this(Lucene92CustomCodec.Mode.LZ4, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); + this(Lucene92CustomCodec.Mode.ZSTD, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); } /** * Creates a new instance. * - * @param mode The mode represents ZSTD, ZSTDNODICT, or LZ4. + * @param mode The mode represents ZSTD or ZSTDNODICT */ public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode) { this(mode, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); @@ -57,14 +52,13 @@ public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode) { /** * Creates a new instance with the specified mode and compression level. * - * @param mode The mode represents ZSTD, ZSTDNODICT, or LZ4. + * @param mode The mode represents ZSTD or ZSTDNODICT * @param compressionLevel The compression level for the mode. */ public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode, int compressionLevel) { this.mode = Objects.requireNonNull(mode); zstdCompressionMode = new ZstdCompressionMode(compressionLevel); zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel); - lz4CompressionMode = new Lz4CompressionMode(); } @Override @@ -106,14 +100,6 @@ private StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { ZSTD_MAX_DOCS_PER_BLOCK, ZSTD_BLOCK_SHIFT ); - case LZ4: - return new Lucene90CompressingStoredFieldsFormat( - "CustomStoredFieldsLz4", - lz4CompressionMode, - LZ4_BLOCK_LENGTH, - LZ4_MAX_DOCS_PER_BLOCK, - LZ4_BLOCK_SHIFT - ); default: throw new AssertionError(); } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java deleted file mode 100644 index db7f71f1a0846..0000000000000 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4Codec.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.codec.customcodec; - -/** - * Lz4Codec provides a native LZ4 implementation based on the lz4-java library. - */ -public class Lz4Codec extends Lucene92CustomCodec { - - /** - * Creates a new Lz4Codec instance. - */ - public Lz4Codec() { - super(Mode.LZ4); - } - - @Override - public String toString() { - return getClass().getSimpleName(); - } -} diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4CompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4CompressionMode.java deleted file mode 100644 index c0d9395db990b..0000000000000 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lz4CompressionMode.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.codec.customcodec; - -import java.io.IOException; -import net.jpountz.lz4.LZ4Compressor; -import net.jpountz.lz4.LZ4Factory; -import net.jpountz.lz4.LZ4FastDecompressor; -import org.apache.lucene.codecs.compressing.CompressionMode; -import org.apache.lucene.codecs.compressing.Compressor; -import org.apache.lucene.codecs.compressing.Decompressor; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; - -/** LZ4 JNI based Compression Mode */ -public class Lz4CompressionMode extends CompressionMode { - - private static final int NUM_SUB_BLOCKS = 10; - - /** default constructor */ - protected Lz4CompressionMode() {} - - @Override - public Compressor newCompressor() { - return new Lz4CompressionMode.LZ4InnerCompressor(); - } - - @Override - public Decompressor newDecompressor() { - return new Lz4CompressionMode.LZ4InnerDecompressor(); - } - - /** LZ4 compressor */ - private static final class LZ4InnerCompressor extends Compressor { - private byte[] compressedBuffer; - private final LZ4Compressor compressor; - - /** Default constructor */ - public LZ4InnerCompressor() { - compressedBuffer = BytesRef.EMPTY_BYTES; - compressor = LZ4Factory.nativeInstance().fastCompressor(); - } - - @Override - public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { - int blockLength = (len + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; - out.writeVInt(blockLength); - - final int end = off + len; - - for (int start = off; start < end; start += blockLength) { - int l = Math.min(blockLength, off + len - start); - - if (l == 0) { - out.writeVInt(0); - return; - } - - final int maxCompressedLength = compressor.maxCompressedLength(l); - compressedBuffer = ArrayUtil.grow(compressedBuffer, maxCompressedLength); - - int compressedSize = compressor.compress(bytes, start, l, compressedBuffer, 0, compressedBuffer.length); - - out.writeVInt(compressedSize); - out.writeBytes(compressedBuffer, compressedSize); - } - } - - @Override - public void close() throws IOException {} - } - - /** LZ4 decompressor */ - private static final class LZ4InnerDecompressor extends Decompressor { - - private byte[] compressedBuffer; - private final LZ4FastDecompressor decompressor; - - /** default decompressor */ - public LZ4InnerDecompressor() { - compressedBuffer = BytesRef.EMPTY_BYTES; - decompressor = LZ4Factory.nativeInstance().fastDecompressor(); - } - - @Override - public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException { - assert offset + length <= originalLength; - - if (length == 0) { - bytes.length = 0; - return; - } - - final int blockLength = in.readVInt(); - bytes.offset = bytes.length = 0; - int offsetInBlock = 0; - int offsetInBytesRef = offset; - - // Skip unneeded blocks - while (offsetInBlock + blockLength < offset) { - final int compressedLength = in.readVInt(); - in.skipBytes(compressedLength); - offsetInBlock += blockLength; - offsetInBytesRef -= blockLength; - } - - // Read blocks that intersect with the interval we need - while (offsetInBlock < offset + length) { - bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + blockLength); - final int compressedLength = in.readVInt(); - if (compressedLength == 0) { - return; - } - compressedBuffer = ArrayUtil.grow(compressedBuffer, compressedLength); - in.readBytes(compressedBuffer, 0, compressedLength); - - int l = Math.min(blockLength, originalLength - offsetInBlock); - bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + l); - - byte[] output = new byte[l]; - - decompressor.decompress(compressedBuffer, 0, output, 0, l); - System.arraycopy(output, 0, bytes.bytes, bytes.length, l); - - bytes.length += l; - offsetInBlock += blockLength; - } - - bytes.offset = offsetInBytesRef; - bytes.length = length; - assert bytes.isValid(); - } - - @Override - public Decompressor clone() { - return new LZ4InnerDecompressor(); - } - } -} diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java index 346ed776acd0e..96d71b06dd4c4 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java @@ -16,7 +16,7 @@ public class PerFieldMappingPostingFormatCodec extends Lucene92CustomCodec { /** * Creates a new instance. * - * @param compressionMode The compression mode (ZSTD, ZSTDNODICT, or LZ4). + * @param compressionMode The compression mode (ZSTD or ZSTDNODICT). * @param mapperService The mapper service. */ public PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService) { diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java index 943a814b25c2f..8eecebb76e665 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java @@ -7,6 +7,6 @@ */ /** - * A plugin that implements native codecs. + * A plugin that implements compression codecs with native implementation. */ package org.opensearch.index.codec.customcodec; diff --git a/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy b/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy new file mode 100644 index 0000000000000..549faea299dec --- /dev/null +++ b/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +grant codeBase "${codebase.zstd-jni}" { + permission java.lang.RuntimePermission "loadLibrary.*"; +}; + diff --git a/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index 4df6c1db0637d..4984c9bba7dc6 100644 --- a/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -1,3 +1,2 @@ -org.opensearch.index.codec.customcodec.Lz4Codec org.opensearch.index.codec.customcodec.ZstdCodec org.opensearch.index.codec.customcodec.ZstdNoDictCodec diff --git a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java index 30d8625eea49b..d43b5c4cef591 100644 --- a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java +++ b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java @@ -45,11 +45,6 @@ public void testZstdNoDictCompression() throws Exception { assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.ZSTDNODICT, codec); } - public void testLz4NativeCompression() throws Exception { - Codec codec = createCodecService().codec("LZ4"); - assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.LZ4, codec); - } - private void assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode expected, Codec actual) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(null); diff --git a/server/src/main/resources/org/opensearch/bootstrap/security.policy b/server/src/main/resources/org/opensearch/bootstrap/security.policy index 06767166e88d5..3671782b9d12f 100644 --- a/server/src/main/resources/org/opensearch/bootstrap/security.policy +++ b/server/src/main/resources/org/opensearch/bootstrap/security.policy @@ -182,6 +182,4 @@ grant { permission java.io.FilePermission "/sys/fs/cgroup/memory", "read"; permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read"; - // needed by zstd-jni and lz4-java - permission java.lang.RuntimePermission "loadLibrary.*"; }; From 45cd6038bd7141d1b5dd2fd122c39fee5e74663a Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Wed, 22 Feb 2023 14:17:33 -0700 Subject: [PATCH 21/25] Upgrade plugin to use Lucene95Codec. Rename files accordingly. - Upgrade plugin to use Lucene95Codec. Rename files accordingly. - Fix lint issue with plugin-security. - Remove thridPartyAudit that was there for supporting lz4-java. Signed-off-by: Mulugeta Mammo --- sandbox/modules/custom-codecs/build.gradle | 5 ----- .../codec/customcodec/CustomCodecService.java | 12 +++++------ ...tomCodec.java => Lucene95CustomCodec.java} | 12 +++++------ ... => Lucene95CustomStoredFieldsFormat.java} | 20 +++++++++---------- .../PerFieldMappingPostingFormatCodec.java | 4 ++-- .../index/codec/customcodec/ZstdCodec.java | 2 +- .../customcodec/ZstdCompressionMode.java | 13 ++++++++++-- .../codec/customcodec/ZstdNoDictCodec.java | 2 +- .../ZstdNoDictCompressionMode.java | 12 +++++++++-- .../plugin-metadata/plugin-security.policy | 1 - .../codec/customcodecs/CustomCodecTests.java | 10 +++++----- 11 files changed, 52 insertions(+), 41 deletions(-) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/{Lucene92CustomCodec.java => Lucene95CustomCodec.java} (74%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/{Lucene92CustomStoredFieldsFormat.java => Lucene95CustomStoredFieldsFormat.java} (85%) diff --git a/sandbox/modules/custom-codecs/build.gradle b/sandbox/modules/custom-codecs/build.gradle index 94b6f3d615b93..5f4f724102aee 100644 --- a/sandbox/modules/custom-codecs/build.gradle +++ b/sandbox/modules/custom-codecs/build.gradle @@ -24,10 +24,5 @@ dependencies { api "com.github.luben:zstd-jni:1.5.4-1" } -// Ignore sun.misc.Unsafe -thirdPartyAudit.ignoreViolations( - 'net.jpountz.util.UnsafeUtils' -) - yamlRestTest.enabled = false; testingConventions.enabled = false; diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java index ab69fa52292d0..2c40adaeaa76b 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java @@ -34,16 +34,16 @@ public CustomCodecService(MapperService mapperService, Logger logger) { super(mapperService, logger); final MapBuilder codecs = MapBuilder.newMapBuilder(); if (mapperService == null) { - codecs.put(Lucene92CustomCodec.Mode.ZSTD.name(), new ZstdCodec()); - codecs.put(Lucene92CustomCodec.Mode.ZSTDNODICT.name(), new ZstdNoDictCodec()); + codecs.put(Lucene95CustomCodec.Mode.ZSTD.name(), new ZstdCodec()); + codecs.put(Lucene95CustomCodec.Mode.ZSTDNODICT.name(), new ZstdNoDictCodec()); } else { codecs.put( - Lucene92CustomCodec.Mode.ZSTD.name(), - new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.ZSTD, mapperService) + Lucene95CustomCodec.Mode.ZSTD.name(), + new PerFieldMappingPostingFormatCodec(Lucene95CustomCodec.Mode.ZSTD, mapperService) ); codecs.put( - Lucene92CustomCodec.Mode.ZSTDNODICT.name(), - new PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode.ZSTDNODICT, mapperService) + Lucene95CustomCodec.Mode.ZSTDNODICT.name(), + new PerFieldMappingPostingFormatCodec(Lucene95CustomCodec.Mode.ZSTDNODICT, mapperService) ); } this.codecs = codecs.immutableMap(); diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomCodec.java similarity index 74% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomCodec.java index 24e38ebc599f1..9c44a04fb4f05 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomCodec.java @@ -10,9 +10,9 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.FilterCodec; -import org.apache.lucene.codecs.lucene92.Lucene92Codec; +import org.apache.lucene.codecs.lucene95.Lucene95Codec; -abstract class Lucene92CustomCodec extends FilterCodec { +abstract class Lucene95CustomCodec extends FilterCodec { public static final int DEFAULT_COMPRESSION_LEVEL = 6; /** Each mode represents a compression algorithm. */ @@ -24,13 +24,13 @@ public enum Mode { private final StoredFieldsFormat storedFieldsFormat; /** new codec for a given compression algorithm and default compression level */ - public Lucene92CustomCodec(Mode mode) { + public Lucene95CustomCodec(Mode mode) { this(mode, DEFAULT_COMPRESSION_LEVEL); } - public Lucene92CustomCodec(Mode mode, int compressionLevel) { - super(mode.name(), new Lucene92Codec()); - this.storedFieldsFormat = new Lucene92CustomStoredFieldsFormat(mode, compressionLevel); + public Lucene95CustomCodec(Mode mode, int compressionLevel) { + super(mode.name(), new Lucene95Codec()); + this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); } @Override diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomStoredFieldsFormat.java similarity index 85% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomStoredFieldsFormat.java index 870cf0f9e51fa..709f930978d20 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene92CustomStoredFieldsFormat.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomStoredFieldsFormat.java @@ -21,10 +21,10 @@ import org.apache.lucene.store.IOContext; /** Stored field format used by pluggable codec */ -public class Lucene92CustomStoredFieldsFormat extends StoredFieldsFormat { +public class Lucene95CustomStoredFieldsFormat extends StoredFieldsFormat { /** A key that we use to map to a mode */ - public static final String MODE_KEY = Lucene92CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; + public static final String MODE_KEY = Lucene95CustomStoredFieldsFormat.class.getSimpleName() + ".mode"; private static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024; private static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096; @@ -33,11 +33,11 @@ public class Lucene92CustomStoredFieldsFormat extends StoredFieldsFormat { private final CompressionMode zstdCompressionMode; private final CompressionMode zstdNoDictCompressionMode; - private final Lucene92CustomCodec.Mode mode; + private final Lucene95CustomCodec.Mode mode; /** default constructor */ - public Lucene92CustomStoredFieldsFormat() { - this(Lucene92CustomCodec.Mode.ZSTD, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); + public Lucene95CustomStoredFieldsFormat() { + this(Lucene95CustomCodec.Mode.ZSTD, Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL); } /** @@ -45,8 +45,8 @@ public Lucene92CustomStoredFieldsFormat() { * * @param mode The mode represents ZSTD or ZSTDNODICT */ - public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode) { - this(mode, Lucene92CustomCodec.DEFAULT_COMPRESSION_LEVEL); + public Lucene95CustomStoredFieldsFormat(Lucene95CustomCodec.Mode mode) { + this(mode, Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL); } /** @@ -55,7 +55,7 @@ public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode) { * @param mode The mode represents ZSTD or ZSTDNODICT * @param compressionLevel The compression level for the mode. */ - public Lucene92CustomStoredFieldsFormat(Lucene92CustomCodec.Mode mode, int compressionLevel) { + public Lucene95CustomStoredFieldsFormat(Lucene95CustomCodec.Mode mode, int compressionLevel) { this.mode = Objects.requireNonNull(mode); zstdCompressionMode = new ZstdCompressionMode(compressionLevel); zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel); @@ -67,7 +67,7 @@ public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, Fiel if (value == null) { throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name); } - Lucene92CustomCodec.Mode mode = Lucene92CustomCodec.Mode.valueOf(value); + Lucene95CustomCodec.Mode mode = Lucene95CustomCodec.Mode.valueOf(value); return impl(mode).fieldsReader(directory, si, fn, context); } @@ -82,7 +82,7 @@ public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOCo return impl(mode).fieldsWriter(directory, si, context); } - private StoredFieldsFormat impl(Lucene92CustomCodec.Mode mode) { + private StoredFieldsFormat impl(Lucene95CustomCodec.Mode mode) { switch (mode) { case ZSTD: return new Lucene90CompressingStoredFieldsFormat( diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java index 96d71b06dd4c4..d298a827693c6 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java @@ -11,7 +11,7 @@ import org.opensearch.index.mapper.MapperService; /** PerFieldMappingPostingFormatCodec. {@link org.opensearch.index.codec.PerFieldMappingPostingFormatCodec} */ -public class PerFieldMappingPostingFormatCodec extends Lucene92CustomCodec { +public class PerFieldMappingPostingFormatCodec extends Lucene95CustomCodec { /** * Creates a new instance. @@ -19,7 +19,7 @@ public class PerFieldMappingPostingFormatCodec extends Lucene92CustomCodec { * @param compressionMode The compression mode (ZSTD or ZSTDNODICT). * @param mapperService The mapper service. */ - public PerFieldMappingPostingFormatCodec(Lucene92CustomCodec.Mode compressionMode, MapperService mapperService) { + public PerFieldMappingPostingFormatCodec(Lucene95CustomCodec.Mode compressionMode, MapperService mapperService) { super(compressionMode); } } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java index cc3f7ad4792f7..fd5712dd570b7 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java @@ -11,7 +11,7 @@ /** * ZstdCodec provides ZSTD compressor using the zstd-jni library. */ -public class ZstdCodec extends Lucene92CustomCodec { +public class ZstdCodec extends Lucene95CustomCodec { /** * Creates a new ZstdCodec instance with the default compression level. diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java index d66df95ae142c..64aa5a1a4bb9a 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java @@ -19,6 +19,7 @@ import org.apache.lucene.codecs.compressing.Decompressor; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -85,8 +86,7 @@ private void doCompress(byte[] bytes, int off, int len, ZstdCompressCtx cctx, Da out.writeBytes(compressedBuffer, compressedSize); } - @Override - public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { + private void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { final int dictLength = len / (NUM_SUB_BLOCKS * DICT_SIZE_FACTOR); final int blockLength = (len - dictLength + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; out.writeVInt(dictLength); @@ -107,6 +107,15 @@ public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOEx } } } + + @Override + public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException { + final int len = (int) buffersInput.size(); + byte[] bytes = new byte[len]; + buffersInput.readBytes(bytes, 0, len); + compress(bytes, 0, len, out); + } + } /** zstandard decompressor */ diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java index d0c1e08495a28..dce1813fddda1 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java @@ -11,7 +11,7 @@ /** * ZstdNoDictCodec provides ZSTD compressor without a dictionary support. */ -public class ZstdNoDictCodec extends Lucene92CustomCodec { +public class ZstdNoDictCodec extends Lucene95CustomCodec { /** * Creates a new ZstdNoDictCodec instance with the default compression level. diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java index 7e9593dbb4dd0..78e4aeab244b4 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java @@ -15,6 +15,7 @@ import org.apache.lucene.codecs.compressing.Decompressor; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -65,8 +66,7 @@ public ZSTDCompressor(int compressionLevel) { @Override public void close() throws IOException {} - @Override - public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { + private void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { int blockLength = (len + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; out.writeVInt(blockLength); @@ -98,6 +98,14 @@ public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOEx out.writeBytes(compressedBuffer, compressedSize); } } + + @Override + public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException { + final int len = (int) buffersInput.size(); + byte[] bytes = new byte[len]; + buffersInput.readBytes(bytes, 0, len); + compress(bytes, 0, len, out); + } } /** zstandard decompressor */ diff --git a/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy b/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy index 549faea299dec..737ccb0e939f3 100644 --- a/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy +++ b/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy @@ -33,4 +33,3 @@ grant codeBase "${codebase.zstd-jni}" { permission java.lang.RuntimePermission "loadLibrary.*"; }; - diff --git a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java index d43b5c4cef591..6833b77ce890a 100644 --- a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java +++ b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java @@ -37,15 +37,15 @@ public class CustomCodecTests extends OpenSearchTestCase { public void testZstdCompression() throws Exception { Codec codec = createCodecService().codec("ZSTD"); - assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.ZSTD, codec); + assertStoredFieldsCustomCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); } public void testZstdNoDictCompression() throws Exception { Codec codec = createCodecService().codec("ZSTDNODICT"); - assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode.ZSTDNODICT, codec); + assertStoredFieldsCustomCompressionEquals(Lucene95CustomCodec.Mode.ZSTDNODICT, codec); } - private void assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode expected, Codec actual) throws Exception { + private void assertStoredFieldsCustomCompressionEquals(Lucene95CustomCodec.Mode expected, Codec actual) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(null); iwc.setCodec(actual); @@ -55,9 +55,9 @@ private void assertStoredFieldsCustomCompressionEquals(Lucene92CustomCodec.Mode iw.close(); DirectoryReader ir = DirectoryReader.open(dir); SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); - String v = sr.getSegmentInfo().info.getAttribute(Lucene92CustomStoredFieldsFormat.MODE_KEY); + String v = sr.getSegmentInfo().info.getAttribute(Lucene95CustomStoredFieldsFormat.MODE_KEY); assertNotNull(v); - assertEquals(expected, Lucene92CustomCodec.Mode.valueOf(v)); + assertEquals(expected, Lucene95CustomCodec.Mode.valueOf(v)); ir.close(); dir.close(); } From f39d093e8278a95c284970eb3d170c6c5ccb068d Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Fri, 17 Mar 2023 10:07:46 -0700 Subject: [PATCH 22/25] Add test cases for compression/decompression. Other minor changes. - add test cases for compression/decompression. - rename package. - add a CHANGELOG entry. - add more checks for signed integer arithmetic. Signed-off-by: Mulugeta Mammo --- CHANGELOG.md | 4 +- sandbox/modules/custom-codecs/build.gradle | 2 +- .../CustomCodecPlugin.java | 2 +- .../CustomCodecService.java | 2 +- .../CustomCodecServiceFactory.java | 2 +- .../Lucene95CustomCodec.java | 2 +- .../Lucene95CustomStoredFieldsFormat.java | 2 +- .../PerFieldMappingPostingFormatCodec.java | 2 +- .../ZstdCodec.java | 2 +- .../ZstdCompressionMode.java | 63 ++--- .../ZstdNoDictCodec.java | 2 +- .../ZstdNoDictCompressionMode.java | 49 ++-- .../package-info.java | 2 +- .../services/org.apache.lucene.codecs.Codec | 4 +- .../customcodecs/AbstractCompressorTests.java | 219 ++++++++++++++++++ .../codec/customcodecs/CustomCodecTests.java | 83 ------- .../customcodecs/ZstdCompressorTests.java | 30 +++ .../ZstdNoDictCompressorTests.java | 30 +++ 18 files changed, 353 insertions(+), 149 deletions(-) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/CustomCodecPlugin.java (95%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/CustomCodecService.java (97%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/CustomCodecServiceFactory.java (93%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/Lucene95CustomCodec.java (96%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/Lucene95CustomStoredFieldsFormat.java (98%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/PerFieldMappingPostingFormatCodec.java (94%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/ZstdCodec.java (94%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/ZstdCompressionMode.java (75%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/ZstdNoDictCodec.java (94%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/ZstdNoDictCompressionMode.java (78%) rename sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/{customcodec => customcodecs}/package-info.java (85%) create mode 100644 sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java delete mode 100644 sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java create mode 100644 sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java create mode 100644 sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b2c379f486a0..1ab8181bcffd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Return success on DeletePits when no PITs exist. ([#6544](https://github.com/opensearch-project/OpenSearch/pull/6544)) - Add node repurpose command for search nodes ([#6517](https://github.com/opensearch-project/OpenSearch/pull/6517)) - [Segment Replication] Apply backpressure when replicas fall behind ([#6563](https://github.com/opensearch-project/OpenSearch/pull/6563)) +- Add experimental support for ZSTD compression. ([#3577](https://github.com/opensearch-project/OpenSearch/pull/3577)) ### Dependencies - Bump `org.apache.logging.log4j:log4j-core` from 2.18.0 to 2.20.0 ([#6490](https://github.com/opensearch-project/OpenSearch/pull/6490)) @@ -91,6 +92,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `net.minidev:json-smart` from 2.4.8 to 2.4.9 - Bump `com.google.protobuf:protobuf-java` from 3.22.0 to 3.22.2 - Bump Netty to 4.1.90.Final ([#6677](https://github.com/opensearch-project/OpenSearch/pull/6677) +- Add `com.github.luben:zstd-jni:1.5.4-1` ([#3577](https://github.com/opensearch-project/OpenSearch/pull/3577)) ### Changed - Require MediaType in Strings.toString API ([#6009](https://github.com/opensearch-project/OpenSearch/pull/6009)) @@ -110,4 +112,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.5...2.x \ No newline at end of file +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.5...2.x diff --git a/sandbox/modules/custom-codecs/build.gradle b/sandbox/modules/custom-codecs/build.gradle index 5f4f724102aee..bf1bc719b0ae6 100644 --- a/sandbox/modules/custom-codecs/build.gradle +++ b/sandbox/modules/custom-codecs/build.gradle @@ -15,7 +15,7 @@ apply plugin: 'opensearch.yaml-rest-test' opensearchplugin { name 'custom-codecs' description 'A plugin that implements custom compression codecs.' - classname 'org.opensearch.index.codec.customcodec.CustomCodecPlugin' + classname 'org.opensearch.index.codec.customcodecs.CustomCodecPlugin' licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt') noticeFile rootProject.file('NOTICE.txt') } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java similarity index 95% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java index 034794826848a..1e0245f3c8c6b 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecPlugin.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.EnginePlugin; diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java similarity index 97% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java index 2c40adaeaa76b..4dd25caa86d94 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecService.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; import org.apache.logging.log4j.Logger; import org.apache.lucene.codecs.Codec; diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecServiceFactory.java similarity index 93% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecServiceFactory.java index 135daa7519565..9a1872abfcbd7 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/CustomCodecServiceFactory.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecServiceFactory.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; import org.opensearch.index.codec.CodecService; import org.opensearch.index.codec.CodecServiceConfig; diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java similarity index 96% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomCodec.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java index 9c44a04fb4f05..652306e59559b 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.FilterCodec; diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomStoredFieldsFormat.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java similarity index 98% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomStoredFieldsFormat.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java index 709f930978d20..e0253516b6d0a 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/Lucene95CustomStoredFieldsFormat.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; import java.io.IOException; import java.util.Objects; diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/PerFieldMappingPostingFormatCodec.java similarity index 94% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/PerFieldMappingPostingFormatCodec.java index d298a827693c6..f1c64853bca40 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/PerFieldMappingPostingFormatCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/PerFieldMappingPostingFormatCodec.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; import org.opensearch.index.mapper.MapperService; diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java similarity index 94% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java index fd5712dd570b7..086e2461b1f6a 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; /** * ZstdCodec provides ZSTD compressor using the zstd-jni library. diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java similarity index 75% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java index 64aa5a1a4bb9a..795ddf3ab2d17 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdCompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; import com.github.luben.zstd.Zstd; import com.github.luben.zstd.ZstdCompressCtx; @@ -48,61 +48,61 @@ protected ZstdCompressionMode(int compressionLevel) { @Override public Compressor newCompressor() { - return new ZSTDCompressor(compressionLevel); + return new ZstdCompressor(compressionLevel); } @Override public Decompressor newDecompressor() { - return new ZSTDDecompressor(); + return new ZstdDecompressor(); } /** zstandard compressor */ - private static final class ZSTDCompressor extends Compressor { + private static final class ZstdCompressor extends Compressor { private final int compressionLevel; private byte[] compressedBuffer; /** compressor with a given compresion level */ - public ZSTDCompressor(int compressionLevel) { + public ZstdCompressor(int compressionLevel) { this.compressionLevel = compressionLevel; compressedBuffer = BytesRef.EMPTY_BYTES; } - @Override - public void close() throws IOException {} - /*resuable compress function*/ - private void doCompress(byte[] bytes, int off, int len, ZstdCompressCtx cctx, DataOutput out) throws IOException { - if (len == 0) { + private void doCompress(byte[] bytes, int offset, int length, ZstdCompressCtx cctx, DataOutput out) throws IOException { + if (length == 0) { out.writeVInt(0); return; } - final int maxCompressedLength = (int) Zstd.compressBound(len); + final int maxCompressedLength = (int) Zstd.compressBound(length); compressedBuffer = ArrayUtil.grow(compressedBuffer, maxCompressedLength); - int compressedSize = cctx.compressByteArray(compressedBuffer, 0, compressedBuffer.length, bytes, off, len); + int compressedSize = cctx.compressByteArray(compressedBuffer, 0, compressedBuffer.length, bytes, offset, length); out.writeVInt(compressedSize); out.writeBytes(compressedBuffer, compressedSize); } - private void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { - final int dictLength = len / (NUM_SUB_BLOCKS * DICT_SIZE_FACTOR); - final int blockLength = (len - dictLength + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; + private void compress(byte[] bytes, int offset, int length, DataOutput out) throws IOException { + assert offset >= 0 : "offset value must be greater than 0"; + + final int dictLength = length / (NUM_SUB_BLOCKS * DICT_SIZE_FACTOR); + final int blockLength = (length - dictLength + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; out.writeVInt(dictLength); out.writeVInt(blockLength); - final int end = off + len; + final int end = offset + length; + assert end >= 0 : "buffer read size must be greater than 0"; try (ZstdCompressCtx cctx = new ZstdCompressCtx()) { - cctx.setLevel(this.compressionLevel); + cctx.setLevel(compressionLevel); // dictionary compression first - doCompress(bytes, off, dictLength, cctx, out); - cctx.loadDict(new ZstdDictCompress(bytes, off, dictLength, this.compressionLevel)); + doCompress(bytes, offset, dictLength, cctx, out); + cctx.loadDict(new ZstdDictCompress(bytes, offset, dictLength, compressionLevel)); - for (int start = off + dictLength; start < end; start += blockLength) { - int l = Math.min(blockLength, off + len - start); + for (int start = offset + dictLength; start < end; start += blockLength) { + int l = Math.min(blockLength, end - start); doCompress(bytes, start, l, cctx, out); } } @@ -110,21 +110,23 @@ private void compress(byte[] bytes, int off, int len, DataOutput out) throws IOE @Override public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException { - final int len = (int) buffersInput.size(); - byte[] bytes = new byte[len]; - buffersInput.readBytes(bytes, 0, len); - compress(bytes, 0, len, out); + final int length = (int) buffersInput.size(); + byte[] bytes = new byte[length]; + buffersInput.readBytes(bytes, 0, length); + compress(bytes, 0, length, out); } + @Override + public void close() throws IOException {} } /** zstandard decompressor */ - private static final class ZSTDDecompressor extends Decompressor { + private static final class ZstdDecompressor extends Decompressor { private byte[] compressedBuffer; /** default decompressor */ - public ZSTDDecompressor() { + public ZstdDecompressor() { compressedBuffer = BytesRef.EMPTY_BYTES; } @@ -149,7 +151,7 @@ private void doDecompress(DataInput in, ZstdDecompressCtx dctx, BytesRef bytes, @Override public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException { - assert offset + length <= originalLength; + assert offset + length <= originalLength : "buffer read size must be within limit"; if (length == 0) { bytes.length = 0; @@ -188,13 +190,14 @@ public void decompress(DataInput in, int originalLength, int offset, int length, bytes.offset = offsetInBytesRef; bytes.length = length; - assert bytes.isValid(); + + assert bytes.isValid() : "decompression output is corrupted"; } } @Override public Decompressor clone() { - return new ZSTDDecompressor(); + return new ZstdDecompressor(); } } } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java similarity index 94% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java index dce1813fddda1..c33ca1f4ff6e7 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCodec.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; /** * ZstdNoDictCodec provides ZSTD compressor without a dictionary support. diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java similarity index 78% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java index 78e4aeab244b4..61808191556f0 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/ZstdNoDictCompressionMode.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; import com.github.luben.zstd.Zstd; import java.io.IOException; @@ -43,38 +43,37 @@ protected ZstdNoDictCompressionMode(int compressionLevel) { @Override public Compressor newCompressor() { - return new ZSTDCompressor(compressionLevel); + return new ZstdCompressor(compressionLevel); } @Override public Decompressor newDecompressor() { - return new ZSTDDecompressor(); + return new ZstdDecompressor(); } /** zstandard compressor */ - private static final class ZSTDCompressor extends Compressor { + private static final class ZstdCompressor extends Compressor { private final int compressionLevel; private byte[] compressedBuffer; /** compressor with a given compresion level */ - public ZSTDCompressor(int compressionLevel) { + public ZstdCompressor(int compressionLevel) { this.compressionLevel = compressionLevel; compressedBuffer = BytesRef.EMPTY_BYTES; } - @Override - public void close() throws IOException {} + private void compress(byte[] bytes, int offset, int length, DataOutput out) throws IOException { + assert offset >= 0 : "offset value must be greater than 0"; - private void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { - - int blockLength = (len + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; + int blockLength = (length + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; out.writeVInt(blockLength); - final int end = off + len; + final int end = offset + length; + assert end >= 0 : "buffer read size must be greater than 0"; - for (int start = off; start < end; start += blockLength) { - int l = Math.min(blockLength, off + len - start); + for (int start = offset; start < end; start += blockLength) { + int l = Math.min(blockLength, end - start); if (l == 0) { out.writeVInt(0); @@ -91,7 +90,7 @@ private void compress(byte[] bytes, int off, int len, DataOutput out) throws IOE bytes, start, l, - this.compressionLevel + compressionLevel ); out.writeVInt(compressedSize); @@ -101,26 +100,29 @@ private void compress(byte[] bytes, int off, int len, DataOutput out) throws IOE @Override public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException { - final int len = (int) buffersInput.size(); - byte[] bytes = new byte[len]; - buffersInput.readBytes(bytes, 0, len); - compress(bytes, 0, len, out); + final int length = (int) buffersInput.size(); + byte[] bytes = new byte[length]; + buffersInput.readBytes(bytes, 0, length); + compress(bytes, 0, length, out); } + + @Override + public void close() throws IOException {} } /** zstandard decompressor */ - private static final class ZSTDDecompressor extends Decompressor { + private static final class ZstdDecompressor extends Decompressor { private byte[] compressed; /** default decompressor */ - public ZSTDDecompressor() { + public ZstdDecompressor() { compressed = BytesRef.EMPTY_BYTES; } @Override public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException { - assert offset + length <= originalLength; + assert offset + length <= originalLength : "buffer read size must be within limit"; if (length == 0) { bytes.length = 0; @@ -164,12 +166,13 @@ public void decompress(DataInput in, int originalLength, int offset, int length, bytes.offset = offsetInBytesRef; bytes.length = length; - assert bytes.isValid(); + + assert bytes.isValid() : "decompression output is corrupted."; } @Override public Decompressor clone() { - return new ZSTDDecompressor(); + return new ZstdDecompressor(); } } } diff --git a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java similarity index 85% rename from sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java rename to sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java index 8eecebb76e665..e996873963b1b 100644 --- a/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodec/package-info.java +++ b/sandbox/modules/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java @@ -9,4 +9,4 @@ /** * A plugin that implements compression codecs with native implementation. */ -package org.opensearch.index.codec.customcodec; +package org.opensearch.index.codec.customcodecs; diff --git a/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index 4984c9bba7dc6..8b37d91cd8bc4 100644 --- a/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/sandbox/modules/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -1,2 +1,2 @@ -org.opensearch.index.codec.customcodec.ZstdCodec -org.opensearch.index.codec.customcodec.ZstdNoDictCodec +org.opensearch.index.codec.customcodecs.ZstdCodec +org.opensearch.index.codec.customcodecs.ZstdNoDictCodec diff --git a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java new file mode 100644 index 0000000000000..63086c2ce22ad --- /dev/null +++ b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java @@ -0,0 +1,219 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodecs; + +import org.apache.lucene.tests.util.LineFileDocs; +import org.apache.lucene.tests.util.TestUtil; +import org.opensearch.test.OpenSearchTestCase; +import org.apache.lucene.codecs.compressing.Compressor; +import org.apache.lucene.codecs.compressing.Decompressor; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ByteBuffersDataInput; +import org.apache.lucene.store.ByteBuffersDataOutput; +import org.apache.lucene.util.BytesRef; + +import java.util.List; +import java.nio.ByteBuffer; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Random; + +/** + * Test cases for compressors (based on {@link org.opensearch.common.compress.DeflateCompressTests}). + */ +public abstract class AbstractCompressorTests extends OpenSearchTestCase { + + abstract Compressor compressor(); + + abstract Decompressor decompressor(); + + public void testEmpty() throws IOException { + final byte[] bytes = "".getBytes(StandardCharsets.UTF_8); + doTest(bytes); + } + + public void testShortLiterals() throws IOException { + final byte[] bytes = "1234567345673456745608910123".getBytes(StandardCharsets.UTF_8); + doTest(bytes); + } + + public void testRandom() throws IOException { + Random r = random(); + for (int i = 0; i < 10; i++) { + final byte[] bytes = new byte[TestUtil.nextInt(r, 1, 100000)]; + r.nextBytes(bytes); + doTest(bytes); + } + } + + public void testLineDocs() throws IOException { + Random r = random(); + LineFileDocs lineFileDocs = new LineFileDocs(r); + for (int i = 0; i < 10; i++) { + int numDocs = TestUtil.nextInt(r, 1, 200); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + for (int j = 0; j < numDocs; j++) { + String s = lineFileDocs.nextDoc().get("body"); + bos.write(s.getBytes(StandardCharsets.UTF_8)); + } + doTest(bos.toByteArray()); + } + lineFileDocs.close(); + } + + public void testRepetitionsL() throws IOException { + Random r = random(); + for (int i = 0; i < 10; i++) { + int numLongs = TestUtil.nextInt(r, 1, 10000); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + long theValue = r.nextLong(); + for (int j = 0; j < numLongs; j++) { + if (r.nextInt(10) == 0) { + theValue = r.nextLong(); + } + bos.write((byte) (theValue >>> 56)); + bos.write((byte) (theValue >>> 48)); + bos.write((byte) (theValue >>> 40)); + bos.write((byte) (theValue >>> 32)); + bos.write((byte) (theValue >>> 24)); + bos.write((byte) (theValue >>> 16)); + bos.write((byte) (theValue >>> 8)); + bos.write((byte) theValue); + } + doTest(bos.toByteArray()); + } + } + + public void testRepetitionsI() throws IOException { + Random r = random(); + for (int i = 0; i < 10; i++) { + int numInts = TestUtil.nextInt(r, 1, 20000); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int theValue = r.nextInt(); + for (int j = 0; j < numInts; j++) { + if (r.nextInt(10) == 0) { + theValue = r.nextInt(); + } + bos.write((byte) (theValue >>> 24)); + bos.write((byte) (theValue >>> 16)); + bos.write((byte) (theValue >>> 8)); + bos.write((byte) theValue); + } + doTest(bos.toByteArray()); + } + } + + public void testRepetitionsS() throws IOException { + Random r = random(); + for (int i = 0; i < 10; i++) { + int numShorts = TestUtil.nextInt(r, 1, 40000); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + short theValue = (short) r.nextInt(65535); + for (int j = 0; j < numShorts; j++) { + if (r.nextInt(10) == 0) { + theValue = (short) r.nextInt(65535); + } + bos.write((byte) (theValue >>> 8)); + bos.write((byte) theValue); + } + doTest(bos.toByteArray()); + } + } + + public void testMixed() throws IOException { + Random r = random(); + LineFileDocs lineFileDocs = new LineFileDocs(r); + for (int i = 0; i < 2; ++i) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int prevInt = r.nextInt(); + long prevLong = r.nextLong(); + while (bos.size() < 400000) { + switch (r.nextInt(4)) { + case 0: + addInt(r, prevInt, bos); + break; + case 1: + addLong(r, prevLong, bos); + break; + case 2: + addString(lineFileDocs, bos); + break; + case 3: + addBytes(r, bos); + break; + default: + throw new IllegalStateException("Random is broken"); + } + } + doTest(bos.toByteArray()); + } + } + + private void addLong(Random r, long prev, ByteArrayOutputStream bos) { + long theValue = prev; + if (r.nextInt(10) != 0) { + theValue = r.nextLong(); + } + bos.write((byte) (theValue >>> 56)); + bos.write((byte) (theValue >>> 48)); + bos.write((byte) (theValue >>> 40)); + bos.write((byte) (theValue >>> 32)); + bos.write((byte) (theValue >>> 24)); + bos.write((byte) (theValue >>> 16)); + bos.write((byte) (theValue >>> 8)); + bos.write((byte) theValue); + } + + private void addInt(Random r, int prev, ByteArrayOutputStream bos) { + int theValue = prev; + if (r.nextInt(10) != 0) { + theValue = r.nextInt(); + } + bos.write((byte) (theValue >>> 24)); + bos.write((byte) (theValue >>> 16)); + bos.write((byte) (theValue >>> 8)); + bos.write((byte) theValue); + } + + private void addString(LineFileDocs lineFileDocs, ByteArrayOutputStream bos) throws IOException { + String s = lineFileDocs.nextDoc().get("body"); + bos.write(s.getBytes(StandardCharsets.UTF_8)); + } + + private void addBytes(Random r, ByteArrayOutputStream bos) throws IOException { + byte bytes[] = new byte[TestUtil.nextInt(r, 1, 10000)]; + r.nextBytes(bytes); + bos.write(bytes); + } + + private void doTest(byte[] bytes) throws IOException { + final int length = bytes.length; + + ByteBuffersDataInput in = new ByteBuffersDataInput(List.of(ByteBuffer.wrap(bytes))); + ByteBuffersDataOutput out = new ByteBuffersDataOutput(); + + // let's compress + Compressor compressor = compressor(); + compressor.compress(in, out); + byte[] compressed = out.toArrayCopy(); + + // let's decompress + BytesRef outbytes = new BytesRef(); + Decompressor decompressor = decompressor(); + decompressor.decompress(new ByteArrayDataInput(compressed), length, 0, length, outbytes); + + // get the uncompressed array out of outbytes + byte[] restored = new byte[outbytes.length]; + System.arraycopy(outbytes.bytes, 0, restored, 0, outbytes.length); + + assertArrayEquals(bytes, restored); + } + +} diff --git a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java deleted file mode 100644 index 6833b77ce890a..0000000000000 --- a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.codec.customcodec; - -import org.apache.logging.log4j.LogManager; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.SegmentReader; -import org.apache.lucene.store.Directory; -import org.apache.lucene.tests.util.LuceneTestCase; -import org.opensearch.common.settings.Settings; -import org.opensearch.env.Environment; -import org.opensearch.index.IndexSettings; -import org.opensearch.index.analysis.IndexAnalyzers; -import org.opensearch.index.codec.CodecService; -import org.opensearch.index.mapper.MapperService; -import org.opensearch.index.similarity.SimilarityService; -import org.opensearch.indices.mapper.MapperRegistry; -import org.opensearch.plugins.MapperPlugin; -import org.opensearch.test.IndexSettingsModule; -import org.opensearch.test.OpenSearchTestCase; - -import java.io.IOException; -import java.util.Collections; - -@LuceneTestCase.SuppressCodecs("*") // we test against default codec so never get a random one here! -public class CustomCodecTests extends OpenSearchTestCase { - - public void testZstdCompression() throws Exception { - Codec codec = createCodecService().codec("ZSTD"); - assertStoredFieldsCustomCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); - } - - public void testZstdNoDictCompression() throws Exception { - Codec codec = createCodecService().codec("ZSTDNODICT"); - assertStoredFieldsCustomCompressionEquals(Lucene95CustomCodec.Mode.ZSTDNODICT, codec); - } - - private void assertStoredFieldsCustomCompressionEquals(Lucene95CustomCodec.Mode expected, Codec actual) throws Exception { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(null); - iwc.setCodec(actual); - IndexWriter iw = new IndexWriter(dir, iwc); - iw.addDocument(new Document()); - iw.commit(); - iw.close(); - DirectoryReader ir = DirectoryReader.open(dir); - SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); - String v = sr.getSegmentInfo().info.getAttribute(Lucene95CustomStoredFieldsFormat.MODE_KEY); - assertNotNull(v); - assertEquals(expected, Lucene95CustomCodec.Mode.valueOf(v)); - ir.close(); - dir.close(); - } - - private CodecService createCodecService() throws IOException { - Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); - IndexSettings settings = IndexSettingsModule.newIndexSettings("_na", nodeSettings); - SimilarityService similarityService = new SimilarityService(settings, null, Collections.emptyMap()); - IndexAnalyzers indexAnalyzers = createTestAnalysis(settings, nodeSettings).indexAnalyzers; - MapperRegistry mapperRegistry = new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER); - MapperService service = new MapperService( - settings, - indexAnalyzers, - xContentRegistry(), - similarityService, - mapperRegistry, - () -> null, - () -> false, - null - ); - return new CodecService(service, LogManager.getLogger("test")); - } -} diff --git a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java new file mode 100644 index 0000000000000..78cf62c08f889 --- /dev/null +++ b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.codec.customcodecs; + +import org.apache.lucene.codecs.compressing.Compressor; +import org.apache.lucene.codecs.compressing.Decompressor; + +/** + * Test ZSTD compression (with dictionary enabled) + */ +public class ZstdCompressorTests extends AbstractCompressorTests { + + private final Compressor compressor = new ZstdCompressionMode().newCompressor(); + private final Decompressor decompressor = new ZstdCompressionMode().newDecompressor(); + + @Override + Compressor compressor() { + return compressor; + } + + @Override + Decompressor decompressor() { + return decompressor; + } +} diff --git a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java new file mode 100644 index 0000000000000..2eda81a6af2ab --- /dev/null +++ b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.codec.customcodecs; + +import org.apache.lucene.codecs.compressing.Compressor; +import org.apache.lucene.codecs.compressing.Decompressor; + +/** + * Test ZSTD compression (with no dictionary). + */ +public class ZstdNoDictCompressorTests extends AbstractCompressorTests { + + private final Compressor compressor = new ZstdNoDictCompressionMode().newCompressor(); + private final Decompressor decompressor = new ZstdNoDictCompressionMode().newDecompressor(); + + @Override + Compressor compressor() { + return compressor; + } + + @Override + Decompressor decompressor() { + return decompressor; + } +} From ac1bd955c371f6d2b6f514ab378f31667a3aa7ad Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Tue, 21 Mar 2023 21:34:47 -0700 Subject: [PATCH 23/25] Remove ES grant in plugin-security.policy. Fix minor javadoc issue. - Remove ES grant in plugin-security.policy file. - Replace @link and @See to fix javadoc error. Signed-off-by: Mulugeta Mammo --- .../plugin-metadata/plugin-security.policy | 24 ------------------- .../customcodecs/AbstractCompressorTests.java | 2 +- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy b/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy index 737ccb0e939f3..8161010cfa897 100644 --- a/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy +++ b/sandbox/modules/custom-codecs/src/main/plugin-metadata/plugin-security.policy @@ -6,30 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - grant codeBase "${codebase.zstd-jni}" { permission java.lang.RuntimePermission "loadLibrary.*"; }; diff --git a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java index 63086c2ce22ad..fcfb06ca6b050 100644 --- a/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java +++ b/sandbox/modules/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java @@ -26,7 +26,7 @@ import java.util.Random; /** - * Test cases for compressors (based on {@link org.opensearch.common.compress.DeflateCompressTests}). + * Test cases for compressors (based on {@See org.opensearch.common.compress.DeflateCompressTests}). */ public abstract class AbstractCompressorTests extends OpenSearchTestCase { From b48119df746a681c1258fa4c02c52c29980ca7be Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Tue, 28 Mar 2023 13:10:45 -0700 Subject: [PATCH 24/25] Upgrade jettison version to 1.5.4. Signed-off-by: Mulugeta Mammo --- buildSrc/version.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildSrc/version.properties b/buildSrc/version.properties index f2a91c6740f5b..b7a7f3e0e6827 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -17,7 +17,7 @@ supercsv = 2.4.0 log4j = 2.17.1 slf4j = 1.7.36 asm = 9.4 -jettison = 1.5.3 +jettison = 1.5.4 woodstox = 6.4.0 kotlin = 1.7.10 antlr4 = 4.11.1 From bc59912f34268541cc61edfc3f0224e5a3b0be0b Mon Sep 17 00:00:00 2001 From: Mulugeta Mammo Date: Tue, 28 Mar 2023 13:40:00 -0700 Subject: [PATCH 25/25] Update SHA for jettison 1.5.4. Signed-off-by: Mulugeta Mammo --- plugins/discovery-azure-classic/licenses/jettison-1.5.3.jar.sha1 | 1 - plugins/discovery-azure-classic/licenses/jettison-1.5.4.jar.sha1 | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 plugins/discovery-azure-classic/licenses/jettison-1.5.3.jar.sha1 create mode 100644 plugins/discovery-azure-classic/licenses/jettison-1.5.4.jar.sha1 diff --git a/plugins/discovery-azure-classic/licenses/jettison-1.5.3.jar.sha1 b/plugins/discovery-azure-classic/licenses/jettison-1.5.3.jar.sha1 deleted file mode 100644 index afd13439e739c..0000000000000 --- a/plugins/discovery-azure-classic/licenses/jettison-1.5.3.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -964d35bbdecbbc33cf2f2044e8a1648d9f6f1474 \ No newline at end of file diff --git a/plugins/discovery-azure-classic/licenses/jettison-1.5.4.jar.sha1 b/plugins/discovery-azure-classic/licenses/jettison-1.5.4.jar.sha1 new file mode 100644 index 0000000000000..a87b7691bfce8 --- /dev/null +++ b/plugins/discovery-azure-classic/licenses/jettison-1.5.4.jar.sha1 @@ -0,0 +1 @@ +174ca56c411b05aec323d8f53e66709c0d28b337 \ No newline at end of file