diff --git a/CHANGELOG.md b/CHANGELOG.md index 76f134f10c29e..48d320dd5bce6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Add timeout on Mockito.verify to reduce flakyness in testReplicationOnDone test([#4314](https://github.com/opensearch-project/OpenSearch/pull/4314)) - Commit workflow for dependabot changelog helper ([#4331](https://github.com/opensearch-project/OpenSearch/pull/4331)) - Fixed cancellation of segment replication events ([#4225](https://github.com/opensearch-project/OpenSearch/pull/4225)) +- [Segment Replication] Bump segment infos counter before commit during replica promotion ([#4365](https://github.com/opensearch-project/OpenSearch/pull/4365)) - Bugs for dependabot changelog verifier workflow ([#4364](https://github.com/opensearch-project/OpenSearch/pull/4364)) - Fix flaky random test `NRTReplicationEngineTests.testUpdateSegments` ([#4352](https://github.com/opensearch-project/OpenSearch/pull/4352)) diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 6f5b7030ed65f..cf753e3360c39 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -54,6 +54,8 @@ public class NRTReplicationEngine extends Engine { private final LocalCheckpointTracker localCheckpointTracker; private final WriteOnlyTranslogManager translogManager; + private static final int SI_COUNTER_INCREMENT = 10; + public NRTReplicationEngine(EngineConfig engineConfig) { super(engineConfig); store.incRef(); @@ -142,6 +144,13 @@ public synchronized void updateSegments(final SegmentInfos infos, long seqNo) th public void commitSegmentInfos() throws IOException { // TODO: This method should wait for replication events to finalize. final SegmentInfos latestSegmentInfos = getLatestSegmentInfos(); + /* + This is a workaround solution which decreases the chances of conflict on replica nodes when same file is copied + from two different primaries during failover. Increasing counter helps in avoiding this conflict as counter is + used to generate new segment file names. The ideal solution is to identify the counter from previous primary. + */ + latestSegmentInfos.counter = latestSegmentInfos.counter + SI_COUNTER_INCREMENT; + latestSegmentInfos.changed(); store.commitSegmentInfos(latestSegmentInfos, localCheckpointTracker.getMaxSeqNo(), localCheckpointTracker.getProcessedCheckpoint()); translogManager.syncTranslog(); } diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index 0008afcc901c7..540054782133a 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -252,6 +252,8 @@ public void testCommitSegmentInfos() throws Exception { // ensure getLatestSegmentInfos returns an updated infos ref with correct userdata. final SegmentInfos latestSegmentInfos = nrtEngine.getLatestSegmentInfos(); assertEquals(previousInfos.getGeneration(), latestSegmentInfos.getLastGeneration()); + assertEquals(previousInfos.getVersion(), latestSegmentInfos.getVersion()); + assertEquals(previousInfos.counter, latestSegmentInfos.counter); Map userData = latestSegmentInfos.getUserData(); assertEquals(processedCheckpoint, localCheckpointTracker.getProcessedCheckpoint()); assertEquals(maxSeqNo, Long.parseLong(userData.get(MAX_SEQ_NO)));