-
Notifications
You must be signed in to change notification settings - Fork 24.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Replica starts peer recovery with safe commit #28181
Changes from 7 commits
2509d17
9d7db40
c9152ea
2ff5354
d66ba4b
1eea7b1
9b25413
efb5149
2642d7e
3ef8bf5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -150,9 +150,9 @@ public RecoveryResponse recoverToTarget() throws IOException { | |
|
||
final long startingSeqNo; | ||
final long requiredSeqNoRangeStart; | ||
final boolean isSequenceNumberBasedRecoveryPossible = request.startingSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO && | ||
final boolean isSequenceNumberBasedRecovery = request.startingSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO && | ||
isTargetSameHistory() && isTranslogReadyForSequenceNumberBasedRecovery(); | ||
if (isSequenceNumberBasedRecoveryPossible) { | ||
if (isSequenceNumberBasedRecovery) { | ||
logger.trace("performing sequence numbers based recovery. starting at [{}]", request.startingSeqNo()); | ||
startingSeqNo = request.startingSeqNo(); | ||
requiredSeqNoRangeStart = startingSeqNo; | ||
|
@@ -188,7 +188,8 @@ public RecoveryResponse recoverToTarget() throws IOException { | |
runUnderPrimaryPermit(() -> shard.initiateTracking(request.targetAllocationId())); | ||
|
||
try { | ||
prepareTargetForTranslog(translog.estimateTotalOperationsFromMinSeq(startingSeqNo)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can roll back all these naming changes if we we keep the old message (and the boolean) |
||
// For a sequence based recovery, the target can keep its local translog | ||
prepareTargetForTranslog(isSequenceNumberBasedRecovery == false, translog.estimateTotalOperationsFromMinSeq(startingSeqNo)); | ||
} catch (final Exception e) { | ||
throw new RecoveryEngineException(shard.shardId(), 1, "prepare target for translog failed", e); | ||
} | ||
|
@@ -421,13 +422,13 @@ public void phase1(final IndexCommit snapshot, final Supplier<Integer> translogO | |
} | ||
} | ||
|
||
void prepareTargetForTranslog(final int totalTranslogOps) throws IOException { | ||
void prepareTargetForTranslog(final boolean createNewTranslog, final int totalTranslogOps) throws IOException { | ||
StopWatch stopWatch = new StopWatch().start(); | ||
logger.trace("recovery [phase1]: prepare remote engine for translog"); | ||
final long startEngineStart = stopWatch.totalTime().millis(); | ||
// Send a request preparing the new shard's translog to receive operations. This ensures the shard engine is started and disables | ||
// garbage collection (not the JVM's GC!) of tombstone deletes. | ||
cancellableThreads.executeIO(() -> recoveryTarget.prepareForTranslogOperations(totalTranslogOps)); | ||
cancellableThreads.executeIO(() -> recoveryTarget.prepareForTranslogOperations(createNewTranslog, totalTranslogOps)); | ||
stopWatch.stop(); | ||
|
||
response.startTime = stopWatch.totalTime().millis() - startEngineStart; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -362,10 +362,14 @@ private void ensureRefCount() { | |
/*** Implementation of {@link RecoveryTargetHandler } */ | ||
|
||
@Override | ||
public void prepareForTranslogOperations(int totalTranslogOps) throws IOException { | ||
public void prepareForTranslogOperations(boolean createNewTranslog, int totalTranslogOps) throws IOException { | ||
state().getTranslog().totalOperations(totalTranslogOps); | ||
// TODO: take the local checkpoint from store as global checkpoint, once we know it's safe | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the todo is still relevant no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I pushed it back as a note as it's a valid TODO. |
||
indexShard().openIndexAndCreateTranslog(false, SequenceNumbers.UNASSIGNED_SEQ_NO); | ||
if (createNewTranslog) { | ||
// TODO: take the local checkpoint from store as global checkpoint, once we know it's safe | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add when you expect it to be safe (what version) |
||
indexShard().openIndexAndCreateTranslog(false, SequenceNumbers.UNASSIGNED_SEQ_NO); | ||
} else { | ||
indexShard().openIndexAndSkipTranslogRecovery(); | ||
} | ||
} | ||
|
||
@Override | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know I came up with the name and I'm sorry for changing my mind. I would propose
createNewTranslog
. better no?