-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Remote Store] Change behaviour in replica recovery for remote translog enabled indices #4318
Changes from all commits
6b4ce75
687e12f
5773fd8
c7e8464
fae774c
94d5cf7
60dfb37
9d813b0
6c4f93b
4eda9b0
3bacd4c
b2ada8e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -163,8 +163,8 @@ | |
import org.opensearch.indices.recovery.RecoveryListener; | ||
import org.opensearch.indices.recovery.RecoveryState; | ||
import org.opensearch.indices.recovery.RecoveryTarget; | ||
import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; | ||
import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; | ||
import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; | ||
import org.opensearch.repositories.RepositoriesService; | ||
import org.opensearch.repositories.Repository; | ||
import org.opensearch.rest.RestStatus; | ||
|
@@ -203,6 +203,7 @@ | |
import java.util.stream.StreamSupport; | ||
|
||
import static org.opensearch.index.seqno.RetentionLeaseActions.RETAIN_ALL; | ||
import static org.opensearch.index.seqno.SequenceNumbers.MAX_SEQ_NO; | ||
import static org.opensearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; | ||
|
||
/** | ||
|
@@ -1703,13 +1704,8 @@ public void prepareForIndexRecovery() { | |
* @return a sequence number that an operation-based peer recovery can start with. | ||
* This is the first operation after the local checkpoint of the safe commit if exists. | ||
*/ | ||
public long recoverLocallyUpToGlobalCheckpoint() { | ||
assert Thread.holdsLock(mutex) == false : "recover locally under mutex"; | ||
if (state != IndexShardState.RECOVERING) { | ||
throw new IndexShardNotRecoveringException(shardId, state); | ||
} | ||
recoveryState.validateCurrentStage(RecoveryState.Stage.INDEX); | ||
assert routingEntry().recoverySource().getType() == RecoverySource.Type.PEER : "not a peer recovery [" + routingEntry() + "]"; | ||
private long recoverLocallyUpToGlobalCheckpoint() { | ||
validateLocalRecoveryState(); | ||
final Optional<SequenceNumbers.CommitInfo> safeCommit; | ||
final long globalCheckpoint; | ||
try { | ||
|
@@ -1792,6 +1788,54 @@ public long recoverLocallyUpToGlobalCheckpoint() { | |
} | ||
} | ||
|
||
public long recoverLocallyAndFetchStartSeqNo(boolean localTranslog) { | ||
if (localTranslog) { | ||
return recoverLocallyUpToGlobalCheckpoint(); | ||
} else { | ||
return recoverLocallyUptoLastCommit(); | ||
} | ||
} | ||
ashking94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/** | ||
* The method figures out the sequence number basis the last commit. | ||
* | ||
* @return the starting sequence number from which the recovery should start. | ||
*/ | ||
private long recoverLocallyUptoLastCommit() { | ||
assert isRemoteTranslogEnabled() : "Remote translog store is not enabled"; | ||
long seqNo; | ||
validateLocalRecoveryState(); | ||
|
||
try { | ||
seqNo = Long.parseLong(store.readLastCommittedSegmentsInfo().getUserData().get(MAX_SEQ_NO)); | ||
} catch (org.apache.lucene.index.IndexNotFoundException e) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: will prefer import. Also add an assertion that this will be called on remote translog enabled indices There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you mean to use static import for MAX_SEQ_NO? If so, made that change. Added the assertions. |
||
logger.error("skip local recovery as no index commit found", e); | ||
return UNASSIGNED_SEQ_NO; | ||
} catch (Exception e) { | ||
logger.error("skip local recovery as failed to find the safe commit", e); | ||
return UNASSIGNED_SEQ_NO; | ||
} | ||
|
||
try { | ||
maybeCheckIndex(); | ||
recoveryState.setStage(RecoveryState.Stage.TRANSLOG); | ||
ashking94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
recoveryState.getTranslog().totalLocal(0); | ||
} catch (Exception e) { | ||
logger.error("check index failed during fetch seqNo", e); | ||
return UNASSIGNED_SEQ_NO; | ||
} | ||
return seqNo; | ||
} | ||
|
||
private void validateLocalRecoveryState() { | ||
assert Thread.holdsLock(mutex) == false : "recover locally under mutex"; | ||
if (state != IndexShardState.RECOVERING) { | ||
throw new IndexShardNotRecoveringException(shardId, state); | ||
} | ||
recoveryState.validateCurrentStage(RecoveryState.Stage.INDEX); | ||
assert routingEntry().recoverySource().getType() == RecoverySource.Type.PEER : "not a peer recovery [" + routingEntry() + "]"; | ||
} | ||
|
||
public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) { | ||
getEngine().translogManager().trimOperationsFromTranslog(getOperationPrimaryTerm(), aboveSeqNo); | ||
} | ||
|
@@ -1998,7 +2042,7 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier) t | |
private boolean assertSequenceNumbersInCommit() throws IOException { | ||
final Map<String, String> userData = SegmentInfos.readLatestCommit(store.directory()).getUserData(); | ||
assert userData.containsKey(SequenceNumbers.LOCAL_CHECKPOINT_KEY) : "commit point doesn't contains a local checkpoint"; | ||
assert userData.containsKey(SequenceNumbers.MAX_SEQ_NO) : "commit point doesn't contains a maximum sequence number"; | ||
assert userData.containsKey(MAX_SEQ_NO) : "commit point doesn't contains a maximum sequence number"; | ||
assert userData.containsKey(Engine.HISTORY_UUID_KEY) : "commit point doesn't contains a history uuid"; | ||
assert userData.get(Engine.HISTORY_UUID_KEY).equals(getHistoryUUID()) : "commit point history uuid [" | ||
+ userData.get(Engine.HISTORY_UUID_KEY) | ||
|
@@ -3275,6 +3319,10 @@ private boolean isRemoteStoreEnabled() { | |
return (remoteStore != null && shardRouting.primary()); | ||
} | ||
|
||
public boolean isRemoteTranslogEnabled() { | ||
return indexSettings() != null && indexSettings().isRemoteTranslogStoreEnabled(); | ||
} | ||
|
||
ashking94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/** | ||
* Acquire a primary operation permit whenever the shard is ready for indexing. If a permit is directly available, the provided | ||
* ActionListener will be called on the calling thread. During relocation hand-off, permit acquisition can be delayed. The provided | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: unwanted