Skip to content

Commit

Permalink
Fix SLM check for restore in progress (elastic#50868)
Browse files Browse the repository at this point in the history
* Fix SLM check for restore in progress

This commit fixes the check in SLM where the `RestoreInProgress`
metadata was checked for existence. Rather than check existence we
should instead check the `isEmpty` method. Prior to this, a successful
restore for a repository that used SLM retention would prevent SLM
retention from running in subsequent invocations, due to SLM thinking
that a restore was still running.
  • Loading branch information
dakrone authored and SivagurunathanV committed Jan 21, 2020
1 parent c5d2402 commit c56637d
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ public static boolean okayToDeleteSnapshots(ClusterState state) {

// Cannot delete during a restore
final RestoreInProgress restoreInProgress = state.custom(RestoreInProgress.TYPE);
if (restoreInProgress != null) {
if (restoreInProgress != null && restoreInProgress.isEmpty() == false) {
return false;
}

Expand Down Expand Up @@ -480,6 +480,7 @@ public void onNewClusterState(ClusterState state) {
logger.debug("retrying SLM snapshot retention deletion after snapshot operation has completed");
reRun.accept(state);
} else {
logger.trace("received new cluster state but a snapshot operation is still running");
observer.waitForNextChange(this);
}
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

import org.elasticsearch.action.ActionFuture;
import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotAction;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequest;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse;
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
Expand All @@ -22,6 +25,7 @@
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.repositories.RepositoriesService;
import org.elasticsearch.repositories.RepositoryException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.snapshots.ConcurrentSnapshotExecutionException;
import org.elasticsearch.snapshots.SnapshotInfo;
import org.elasticsearch.snapshots.SnapshotMissingException;
Expand All @@ -30,6 +34,7 @@
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
import org.elasticsearch.xpack.core.ilm.LifecycleSettings;
import org.elasticsearch.xpack.core.slm.SnapshotInvocationRecord;
import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicy;
import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicyItem;
import org.elasticsearch.xpack.core.slm.SnapshotRetentionConfiguration;
Expand Down Expand Up @@ -385,6 +390,74 @@ private void testUnsuccessfulSnapshotRetention(boolean partialSuccess) throws Ex
}
}

public void testSLMRetentionAfterRestore() throws Exception {
final String indexName = "test";
final String policyName = "test-policy";
int docCount = 20;
for (int i = 0; i < docCount; i++) {
index(indexName, i + "", Collections.singletonMap("foo", "bar"));
}

// Create a snapshot repo
initializeRepo(REPO);

logger.info("--> creating policy {}", policyName);
createSnapshotPolicy(policyName, "snap", NEVER_EXECUTE_CRON_SCHEDULE, REPO, indexName, true, false,
new SnapshotRetentionConfiguration(TimeValue.ZERO, null, null));

logger.info("--> executing snapshot lifecycle");
final String snapshotName = executePolicy(policyName);

// Check that the executed snapshot shows up in the SLM output
assertBusy(() -> {
GetSnapshotLifecycleAction.Response getResp =
client().execute(GetSnapshotLifecycleAction.INSTANCE, new GetSnapshotLifecycleAction.Request(policyName)).get();
logger.info("--> checking for in progress snapshot...");

assertThat(getResp.getPolicies().size(), greaterThan(0));
SnapshotLifecyclePolicyItem item = getResp.getPolicies().get(0);
SnapshotInvocationRecord lastSuccess = item.getLastSuccess();
assertNotNull(lastSuccess);
assertThat(lastSuccess.getSnapshotName(), equalTo(snapshotName));
});

logger.info("--> restoring index");
RestoreSnapshotRequest restoreReq = new RestoreSnapshotRequest(REPO, snapshotName);
restoreReq.indices(indexName);
restoreReq.renamePattern("(.+)");
restoreReq.renameReplacement("restored_$1");
restoreReq.waitForCompletion(true);
RestoreSnapshotResponse resp = client().execute(RestoreSnapshotAction.INSTANCE, restoreReq).get();
assertThat(resp.status(), equalTo(RestStatus.OK));

logger.info("--> executing SLM retention");
assertAcked(client().execute(ExecuteSnapshotRetentionAction.INSTANCE, new ExecuteSnapshotRetentionAction.Request()).get());
logger.info("--> waiting for {} snapshot to be deleted", snapshotName);
assertBusy(() -> {
try {
try {
GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
.prepareGetSnapshots(REPO).setSnapshots(snapshotName).get();
assertThat(snapshotsStatusResponse.getSnapshots(REPO), empty());
} catch (SnapshotMissingException e) {
// This is what we want to happen
}
logger.info("--> snapshot [{}] has been deleted", snapshotName);
} catch (RepositoryException re) {
// Concurrent status calls and write operations may lead to failures in determining the current repository generation
// TODO: Remove this hack once tracking the current repository generation has been made consistent
throw new AssertionError(re);
}
});

// Cancel/delete the snapshot
try {
client().admin().cluster().prepareDeleteSnapshot(REPO, snapshotName).get();
} catch (SnapshotMissingException e) {
// ignore
}
}

private SnapshotsStatusResponse getSnapshotStatus(String snapshotName) {
try {
return client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(snapshotName).get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.not;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class SnapshotRetentionTaskTests extends ESTestCase {

Expand Down Expand Up @@ -363,6 +364,14 @@ public void testOkToDeleteSnapshots() {
.build();

assertThat(SnapshotRetentionTask.okayToDeleteSnapshots(state), equalTo(false));

restoreInProgress = mock(RestoreInProgress.class);
when(restoreInProgress.isEmpty()).thenReturn(true);
state = ClusterState.builder(new ClusterName("cluster"))
.putCustom(RestoreInProgress.TYPE, restoreInProgress)
.build();

assertThat(SnapshotRetentionTask.okayToDeleteSnapshots(state), equalTo(true));
}

public void testSkipWhileStopping() throws Exception {
Expand Down Expand Up @@ -420,10 +429,10 @@ private void doTestRunManuallyDuringMode(OperationMode mode) throws Exception {
final String repoId = "repo";
SnapshotLifecyclePolicy policy = new SnapshotLifecyclePolicy(policyId, "snap", "1 * * * * ?",
repoId, null, new SnapshotRetentionConfiguration(TimeValue.timeValueDays(30), null, null));

ClusterState state = createState(mode, policy);
ClusterServiceUtils.setState(clusterService, state);

AtomicBoolean retentionWasRun = new AtomicBoolean(false);
MockSnapshotRetentionTask task = new MockSnapshotRetentionTask(noOpClient, clusterService,
new SnapshotLifecycleTaskTests.VerifyingHistoryStore(noOpClient, ZoneOffset.UTC, (historyItem) -> {
Expand All @@ -436,10 +445,10 @@ private void doTestRunManuallyDuringMode(OperationMode mode) throws Exception {
(deletionPolicyId, repo, snapId, slmStats, listener) -> {
},
System::nanoTime);

long time = System.currentTimeMillis();
task.triggered(new SchedulerEngine.Event(SnapshotRetentionService.SLM_RETENTION_MANUAL_JOB_ID, time, time));

assertTrue("retention should be run manually even if SLM is disabled", retentionWasRun.get());
} finally {
threadPool.shutdownNow();
Expand Down

0 comments on commit c56637d

Please sign in to comment.