From 6995db418bfbd4558d266bf40c29a1f19c05f1e4 Mon Sep 17 00:00:00 2001 From: Suraj Singh Date: Wed, 31 Aug 2022 19:45:49 -0700 Subject: [PATCH 1/2] [Segment Replication] Bump segment infos counter before commit during replica promotion Signed-off-by: Suraj Singh --- .../opensearch/index/engine/NRTReplicationEngine.java | 9 +++++++++ .../index/engine/NRTReplicationEngineTests.java | 2 ++ 2 files changed, 11 insertions(+) diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 6f5b7030ed65f..cf753e3360c39 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -54,6 +54,8 @@ public class NRTReplicationEngine extends Engine { private final LocalCheckpointTracker localCheckpointTracker; private final WriteOnlyTranslogManager translogManager; + private static final int SI_COUNTER_INCREMENT = 10; + public NRTReplicationEngine(EngineConfig engineConfig) { super(engineConfig); store.incRef(); @@ -142,6 +144,13 @@ public synchronized void updateSegments(final SegmentInfos infos, long seqNo) th public void commitSegmentInfos() throws IOException { // TODO: This method should wait for replication events to finalize. final SegmentInfos latestSegmentInfos = getLatestSegmentInfos(); + /* + This is a workaround solution which decreases the chances of conflict on replica nodes when same file is copied + from two different primaries during failover. Increasing counter helps in avoiding this conflict as counter is + used to generate new segment file names. The ideal solution is to identify the counter from previous primary. + */ + latestSegmentInfos.counter = latestSegmentInfos.counter + SI_COUNTER_INCREMENT; + latestSegmentInfos.changed(); store.commitSegmentInfos(latestSegmentInfos, localCheckpointTracker.getMaxSeqNo(), localCheckpointTracker.getProcessedCheckpoint()); translogManager.syncTranslog(); } diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index 1fe1a37dedae0..f1b3b57657caa 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -248,6 +248,8 @@ public void testCommitSegmentInfos() throws Exception { // ensure getLatestSegmentInfos returns an updated infos ref with correct userdata. final SegmentInfos latestSegmentInfos = nrtEngine.getLatestSegmentInfos(); assertEquals(previousInfos.getGeneration(), latestSegmentInfos.getLastGeneration()); + assertEquals(previousInfos.getVersion(), latestSegmentInfos.getVersion()); + assertEquals(previousInfos.counter, latestSegmentInfos.counter); Map userData = latestSegmentInfos.getUserData(); assertEquals(processedCheckpoint, localCheckpointTracker.getProcessedCheckpoint()); assertEquals(maxSeqNo, Long.parseLong(userData.get(MAX_SEQ_NO))); From 5a19bd2e2f63145bb3b3e0135dd0e4635427a5e6 Mon Sep 17 00:00:00 2001 From: Suraj Singh Date: Wed, 31 Aug 2022 20:10:13 -0700 Subject: [PATCH 2/2] Add changelog entry Signed-off-by: Suraj Singh --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d07052d55ff0..fe22ae688d668 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Add timeout on Mockito.verify to reduce flakyness in testReplicationOnDone test([#4314](https://github.com/opensearch-project/OpenSearch/pull/4314)) - Commit workflow for dependabot changelog helper ([#4331](https://github.com/opensearch-project/OpenSearch/pull/4331)) - Fixed cancellation of segment replication events ([#4225](https://github.com/opensearch-project/OpenSearch/pull/4225)) +- [Segment Replication] Bump segment infos counter before commit during replica promotion ([#4365](https://github.com/opensearch-project/OpenSearch/pull/4365)) ### Security - CVE-2022-25857 org.yaml:snakeyaml DOS vulnerability ([#4341](https://github.com/opensearch-project/OpenSearch/pull/4341))