Skip to content

Commit

Permalink
[7.5] Don't schedule SLM jobs when services have been stopped… (#48693)
Browse files Browse the repository at this point in the history
This adds a guard for the SLM lifecycle and retention service that
prevents new jobs from being scheduled once the service has been
stopped. Previous if the node were shut down the service would be
stopped, but a cluster state or local master election would cause a job
to attempt to be scheduled. This could lead to an uncaught
`RejectedExecutionException`.

Resolves #47749
  • Loading branch information
dakrone authored Oct 30, 2019
1 parent 596d7c6 commit ddf04fb
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand All @@ -48,6 +49,7 @@ public class SnapshotLifecycleService implements LocalNodeMasterListener, Closea
private final ClusterService clusterService;
private final SnapshotLifecycleTask snapshotTask;
private final Map<String, SchedulerEngine.Job> scheduledTasks = ConcurrentCollections.newConcurrentMap();
private final AtomicBoolean running = new AtomicBoolean(true);
private volatile boolean isMaster = false;

public SnapshotLifecycleService(Settings settings,
Expand Down Expand Up @@ -160,6 +162,10 @@ public void cleanupDeletedPolicies(final ClusterState state) {
* the same version of a policy has already been scheduled it does not overwrite the job.
*/
public void maybeScheduleSnapshot(final SnapshotLifecyclePolicyMetadata snapshotLifecyclePolicy) {
if (this.running.get() == false) {
return;
}

final String jobId = getJobId(snapshotLifecyclePolicy);
final Pattern existingJobPattern = Pattern.compile(snapshotLifecyclePolicy.getPolicy().getId() + JOB_PATTERN_SUFFIX);

Expand Down Expand Up @@ -237,6 +243,8 @@ public String executorName() {

@Override
public void close() {
this.scheduler.stop();
if (this.running.compareAndSet(true, false)) {
this.scheduler.stop();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.Closeable;
import java.time.Clock;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Supplier;

/**
Expand All @@ -38,6 +39,7 @@ public class SnapshotRetentionService implements LocalNodeMasterListener, Closea
private final SchedulerEngine scheduler;
private final SnapshotRetentionTask retentionTask;
private final Clock clock;
private final AtomicBoolean running = new AtomicBoolean(true);

private volatile String slmRetentionSchedule;
private volatile boolean isMaster = false;
Expand Down Expand Up @@ -81,7 +83,7 @@ public void offMaster() {

private void rescheduleRetentionJob() {
final String schedule = this.slmRetentionSchedule;
if (this.isMaster && Strings.hasText(schedule)) {
if (this.running.get() && this.isMaster && Strings.hasText(schedule)) {
final SchedulerEngine.Job retentionJob = new SchedulerEngine.Job(SLM_RETENTION_JOB_ID,
new CronSchedule(schedule));
logger.debug("scheduling SLM retention job for [{}]", schedule);
Expand Down Expand Up @@ -113,6 +115,8 @@ public String executorName() {

@Override
public void close() {
this.scheduler.stop();
if (this.running.compareAndSet(true, false)) {
this.scheduler.stop();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ public void testNothingScheduledWhenNotRunning() {
// Since the service is stopped, jobs should have been cancelled
assertThat(sls.getScheduler().scheduledJobIds(), equalTo(Collections.emptySet()));

// No jobs should be scheduled when service is closed
state = createState(new SnapshotLifecycleMetadata(policies, OperationMode.RUNNING, new SnapshotLifecycleStats()));
sls.close();
sls.onMaster();
sls.clusterChanged(new ClusterChangedEvent("1", state, emptyState));
assertThat(sls.getScheduler().scheduledJobIds(), equalTo(Collections.emptySet()));

threadPool.shutdownNow();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ public void testJobsAreScheduled() {

service.setUpdateSchedule("");
assertThat(service.getScheduler().jobCount(), equalTo(0));

// Service should not scheduled any jobs once closed
service.close();
service.onMaster();
assertThat(service.getScheduler().jobCount(), equalTo(0));

threadPool.shutdownNow();
}
}
Expand Down

0 comments on commit ddf04fb

Please sign in to comment.