diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java index e4466ca2cba5c..114d07589b0c0 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java @@ -8,11 +8,7 @@ package org.opensearch.index.remote; -import org.apache.lucene.index.CorruptIndexException; - import java.util.Arrays; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Utils for remote store @@ -54,23 +50,23 @@ public static long invertLong(String str) { } /** - * Extracts the Lucene major version from the provided DocValuesUpdates file name - * @param filename DocValuesUpdates file name to parse - * @return Lucene major version that wrote the DocValuesUpdates file - * @throws CorruptIndexException If the Lucene major version cannot be inferred + * Extracts the segment name from the provided segment file name + * @param filename Segment file name to parse + * @return Name of the segment that the segment file belongs to */ - public static int getLuceneVersionForDocValuesUpdates(String filename) throws CorruptIndexException { - // TODO: The following regex could work incorrectly if both major and minor versions are double-digits. - // This is because the major and minor versions do not have a separator in the filename currently - // (Lucence). - // We may need to revisit this if the filename pattern is updated in future Lucene versions. - Pattern docValuesUpdatesFileNamePattern = Pattern.compile("_\\d+_\\d+_Lucene(\\d+)\\d+_\\d+"); + public static String getSegmentName(String filename) { + // Segment file names follow patterns like "_0.cfe" or "_0_1_Lucene90_0.dvm". + // Here, the segment name is "_0", which is the set of characters + // starting with "_" until the next "_" or first ".". + int endIdx = filename.indexOf('_', 1); + if (endIdx == -1) { + endIdx = filename.indexOf('.'); + } - Matcher matcher = docValuesUpdatesFileNamePattern.matcher(filename); - if (matcher.find()) { - return Integer.parseInt(matcher.group(1)); - } else { - throw new CorruptIndexException("Unable to infer Lucene version for segment file " + filename, filename); + if (endIdx == -1) { + throw new IllegalArgumentException("Unable to infer segment name for segment file " + filename); } + + return filename.substring(0, endIdx); } } diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index 05cb3a4ab5a43..8ee267cb67e68 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -16,7 +16,6 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.SegmentCommitInfo; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersIndexOutput; @@ -624,22 +623,12 @@ public void uploadMetadata( ); try { try (IndexOutput indexOutput = storeDirectory.createOutput(metadataFilename, IOContext.DEFAULT)) { - Map segmentToLuceneVersion = getSegmentToLuceneVersion(segmentInfosSnapshot); + Map segmentToLuceneVersion = getSegmentToLuceneVersion(segmentFiles, segmentInfosSnapshot); Map uploadedSegments = new HashMap<>(); for (String file : segmentFiles) { if (segmentsUploadedToRemoteStore.containsKey(file)) { UploadedSegmentMetadata metadata = segmentsUploadedToRemoteStore.get(file); - if (segmentToLuceneVersion.containsKey(metadata.originalFilename)) { - metadata.setWrittenByMajor(segmentToLuceneVersion.get(metadata.originalFilename)); - } else if (metadata.originalFilename.equals(segmentInfosSnapshot.getSegmentsFileName())) { - metadata.setWrittenByMajor(segmentInfosSnapshot.getCommitLuceneVersion().major); - } else { - throw new CorruptIndexException( - "Lucene version is missing for segment file " + metadata.originalFilename, - metadata.originalFilename - ); - } - + metadata.setWrittenByMajor(segmentToLuceneVersion.get(metadata.originalFilename)); uploadedSegments.put(file, metadata.toString()); } else { throw new NoSuchFileException(file); @@ -671,12 +660,13 @@ public void uploadMetadata( } /** - * Parses the provided SegmenttInfos to retrieve a mapping of segment files to the respective Lucene major version that wrote the segments - * @param segmentInfosSnapshot SegmenttInfos instance to parse - * @return Map of segment file name to the Lucene major version that wrote it - * @throws CorruptIndexException If the Lucene major version cannot be parsed for a segment file + * Parses the provided SegmentInfos to retrieve a mapping of the provided segment files to + * the respective Lucene major version that wrote the segments + * @param segmentFiles List of segment files for which the Lucene major version is needed + * @param segmentInfosSnapshot SegmentInfos instance to parse + * @return Map of the segment file to its Lucene major version */ - private Map getSegmentToLuceneVersion(SegmentInfos segmentInfosSnapshot) throws CorruptIndexException { + private Map getSegmentToLuceneVersion(Collection segmentFiles, SegmentInfos segmentInfosSnapshot) { Map segmentToLuceneVersion = new HashMap<>(); for (SegmentCommitInfo segmentCommitInfo : segmentInfosSnapshot) { SegmentInfo info = segmentCommitInfo.info; @@ -684,21 +674,16 @@ private Map getSegmentToLuceneVersion(SegmentInfos segmentInfos for (String file : segFiles) { segmentToLuceneVersion.put(file, info.getVersion().major); } + } - int docValuesUpdatesLuceneMajorVersion = -1; - Map> docValuesUpdatesFiles = segmentCommitInfo.getDocValuesUpdatesFiles(); - for (int key : docValuesUpdatesFiles.keySet()) { - for (String file : docValuesUpdatesFiles.get(key)) { - if (docValuesUpdatesLuceneMajorVersion == -1) { - docValuesUpdatesLuceneMajorVersion = RemoteStoreUtils.getLuceneVersionForDocValuesUpdates(file); - } - - segmentToLuceneVersion.put(file, docValuesUpdatesLuceneMajorVersion); - } - - Set fieldInfosFiles = segmentCommitInfo.getFieldInfosFiles(); - for (String file : fieldInfosFiles) { - segmentToLuceneVersion.put(file, docValuesUpdatesLuceneMajorVersion); + for (String file : segmentFiles) { + if (segmentToLuceneVersion.containsKey(file) == false) { + if (file.equals(segmentInfosSnapshot.getSegmentsFileName())) { + segmentToLuceneVersion.put(file, segmentInfosSnapshot.getCommitLuceneVersion().major); + } else { + // Fallback to the Lucene major version of the respective segment's .si file + String segmentInfoFileName = RemoteStoreUtils.getSegmentName(file) + ".si"; + segmentToLuceneVersion.put(file, segmentToLuceneVersion.get(segmentInfoFileName)); } } } diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java index 130ee86d405ea..9afa75dd601b2 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java @@ -8,7 +8,6 @@ package org.opensearch.index.remote; -import org.apache.lucene.index.CorruptIndexException; import org.opensearch.test.OpenSearchTestCase; public class RemoteStoreUtilsTests extends OpenSearchTestCase { @@ -40,11 +39,25 @@ public void testinvert() { } } - public void testGetLuceneVersionForDocValuesUpdates() throws CorruptIndexException { - assertEquals(9, RemoteStoreUtils.getLuceneVersionForDocValuesUpdates("_0_1_Lucene90_0.dvm")); + public void testGetSegmentNameForCfeFile() { + assertEquals("_foo", RemoteStoreUtils.getSegmentName("_foo.cfe")); } - public void testGetLuceneVersionForDocValuesUpdatesException() { - assertThrows(CorruptIndexException.class, () -> RemoteStoreUtils.getLuceneVersionForDocValuesUpdates("_0_1_Asserting_0.dvm")); + public void testGetSegmentNameForDvmFile() { + assertEquals("_bar", RemoteStoreUtils.getSegmentName("_bar_1_Lucene90_0.dvm")); + } + + public void testGetSegmentNameWeirdSegmentNameOnlyUnderscore() { + // Validate behaviour when segment name contains delimiters only + assertEquals("_", RemoteStoreUtils.getSegmentName("_.dvm")); + } + + public void testGetSegmentNameUnderscoreDelimiterOverrides() { + // Validate behaviour when segment name contains delimiters only + assertEquals("_", RemoteStoreUtils.getSegmentName("___.dvm")); + } + + public void testGetSegmentNameException() { + assertThrows(IllegalArgumentException.class, () -> RemoteStoreUtils.getSegmentName("dvd")); } }