diff --git a/CHANGELOG.md b/CHANGELOG.md index f9e0c18d5a81c..e168c79158a7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - PR reference to checkout code for changelog verifier ([#4296](https://github.com/opensearch-project/OpenSearch/pull/4296)) - Restore using the class ClusterInfoRequest and ClusterInfoRequestBuilder from package 'org.opensearch.action.support.master.info' for subclasses ([#4324](https://github.com/opensearch-project/OpenSearch/pull/4324)) - Fixed cancellation of segment replication events ([#4225](https://github.com/opensearch-project/OpenSearch/pull/4225)) +- [Segment Replication] Bump segment infos counter before commit during replica promotion ([#4365](https://github.com/opensearch-project/OpenSearch/pull/4365)) ### Security diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 04385c4ac6c3b..4cfadeba6f7de 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -56,6 +56,8 @@ public class NRTReplicationEngine extends Engine implements LifecycleAware { private final LocalCheckpointTracker localCheckpointTracker; private final WriteOnlyTranslogManager translogManager; + private static final int SI_COUNTER_INCREMENT = 10; + public NRTReplicationEngine(EngineConfig engineConfig) { super(engineConfig); store.incRef(); @@ -142,6 +144,13 @@ public synchronized void updateSegments(final SegmentInfos infos, long seqNo) th public void commitSegmentInfos() throws IOException { // TODO: This method should wait for replication events to finalize. final SegmentInfos latestSegmentInfos = getLatestSegmentInfos(); + /* + This is a workaround solution which decreases the chances of conflict on replica nodes when same file is copied + from two different primaries during failover. Increasing counter helps in avoiding this conflict as counter is + used to generate new segment file names. The ideal solution is to identify the counter from previous primary. + */ + latestSegmentInfos.counter = latestSegmentInfos.counter + SI_COUNTER_INCREMENT; + latestSegmentInfos.changed(); store.commitSegmentInfos(latestSegmentInfos, localCheckpointTracker.getMaxSeqNo(), localCheckpointTracker.getProcessedCheckpoint()); translogManager.syncTranslog(); } diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index de5198de9a29e..5e544b47f6633 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -245,6 +245,8 @@ public void testCommitSegmentInfos() throws Exception { // ensure getLatestSegmentInfos returns an updated infos ref with correct userdata. final SegmentInfos latestSegmentInfos = nrtEngine.getLatestSegmentInfos(); assertEquals(previousInfos.getGeneration(), latestSegmentInfos.getLastGeneration()); + assertEquals(previousInfos.getVersion(), latestSegmentInfos.getVersion()); + assertEquals(previousInfos.counter, latestSegmentInfos.counter); Map userData = latestSegmentInfos.getUserData(); assertEquals(processedCheckpoint, localCheckpointTracker.getProcessedCheckpoint()); assertEquals(maxSeqNo, Long.parseLong(userData.get(MAX_SEQ_NO)));