From 9136d86c3a9db41c8b1fdcd4cce04bb7fff5f9ba Mon Sep 17 00:00:00 2001 From: vikasvb90 Date: Fri, 11 Oct 2024 17:39:36 +0530 Subject: [PATCH] Handling No file exception due to absence of remote metadata file --- ...nPlaceShardSplitRecoverySourceHandler.java | 46 ++++++++++++------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/recovery/inplacesplit/InPlaceShardSplitRecoverySourceHandler.java b/server/src/main/java/org/opensearch/indices/recovery/inplacesplit/InPlaceShardSplitRecoverySourceHandler.java index 6ec47c08d9066..15fca1f73fed0 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/inplacesplit/InPlaceShardSplitRecoverySourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/inplacesplit/InPlaceShardSplitRecoverySourceHandler.java @@ -41,6 +41,7 @@ import java.io.Closeable; import java.io.IOException; +import java.nio.file.NoSuchFileException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -133,26 +134,17 @@ protected void innerRecoveryToTarget(ActionListener listener, resources.addAll(delayedStaleCommitDeleteOps); GatedCloseable translogRetentionLock = sourceShard.acquireRetentionLockWithMinGen(); resources.add(translogRetentionLock); - // Make sure that all operations before acquired translog generation are present in the last commit. - // In remote store replication mode refreshed but not flushed ops are also trimmed from translog and hence, - // a flush is required to ensure that all operations before the acquired translog are present in the local commit. - // Also, a refresh is done as part of flush and therefore, we can expect commit to be present in remote store - // as well. - sourceShard.flush(new FlushRequest().waitIfOngoing(true).force(true)); - Releasable releaseStore = acquireStore(sourceShard.store()); resources.add(releaseStore); - GatedCloseable lastCommit = acquireLastCommit(sourceShard,false); - resources.add(lastCommit); - Tuple fetchedMetadataTuple = null; - if (sourceShard.remoteStore() != null) { - fetchedMetadataTuple = sourceShard.getMetadataContentForCommit( - sourceShard.getOperationPrimaryTerm(), - lastCommit.get().getGeneration()); - ensureMetadataHasAllSegmentsFromCommit(lastCommit.get(), fetchedMetadataTuple.v2()); + GatedCloseable lastCommit; + try { + lastCommit = acquireCommitAndFetchMetadata(translogRetentionLock); + } catch (NoSuchFileException ex) { + // Handling of a known issue in remote store flow https://github.com/opensearch-project/OpenSearch/pull/10341 + logger.warn("Exception while acquiring commit and fetching metadata", ex); + lastCommit = acquireCommitAndFetchMetadata(translogRetentionLock); } - splitCommitMetadata.set(new SplitCommitMetadata(translogRetentionLock.get(), fetchedMetadataTuple)); final StepListener sendFileStep = new StepListener<>(); final StepListener prepareEngineStep = new StepListener<>(); @@ -205,6 +197,28 @@ protected void innerRecoveryToTarget(ActionListener listener, finalizeStepAndCompleteFuture(startingSeqNo, sendSnapshotStep, sendFileStepWithEmptyResult(), prepareEngineStep, finalizeStep, onFailure); } + private GatedCloseable acquireCommitAndFetchMetadata(GatedCloseable translogRetentionLock) throws IOException { + // Make sure that all operations before acquired translog generation are present in the last commit. + // In remote store replication mode refreshed but not flushed ops are also trimmed from translog and hence, + // a flush is required to ensure that all operations before the acquired translog are present in the local commit. + // Also, a refresh is done as part of flush and therefore, we can expect commit to be present in remote store + // as well. + sourceShard.flush(new FlushRequest().waitIfOngoing(true).force(true)); + + GatedCloseable lastCommit = acquireLastCommit(sourceShard,false); + resources.add(lastCommit); + + Tuple fetchedMetadataTuple = null; + if (sourceShard.remoteStore() != null) { + fetchedMetadataTuple = sourceShard.getMetadataContentForCommit( + sourceShard.getOperationPrimaryTerm(), + lastCommit.get().getGeneration()); + ensureMetadataHasAllSegmentsFromCommit(lastCommit.get(), fetchedMetadataTuple.v2()); + } + splitCommitMetadata.set(new SplitCommitMetadata(translogRetentionLock.get(), fetchedMetadataTuple)); + return lastCommit; + } + private void ensureMetadataHasAllSegmentsFromCommit(IndexCommit indexCommit, RemoteSegmentMetadata metadata) throws IOException { List missingFiles = new ArrayList<>(); for (String file : indexCommit.getFileNames()) {