Skip to content

Commit

Permalink
Don't schedule SLM jobs when services have been stopped (elastic#48658)
Browse files Browse the repository at this point in the history
This adds a guard for the SLM lifecycle and retention service that
prevents new jobs from being scheduled once the service has been
stopped. Previous if the node were shut down the service would be
stopped, but a cluster state or local master election would cause a job
to attempt to be scheduled. This could lead to an uncaught
`RejectedExecutionException`.

Resolves elastic#47749
  • Loading branch information
dakrone committed Oct 30, 2019
1 parent 569fb14 commit 217aeaa
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand All @@ -47,6 +48,7 @@ public class SnapshotLifecycleService implements LocalNodeMasterListener, Closea
private final ClusterService clusterService;
private final SnapshotLifecycleTask snapshotTask;
private final Map<String, SchedulerEngine.Job> scheduledTasks = ConcurrentCollections.newConcurrentMap();
private final AtomicBoolean running = new AtomicBoolean(true);
private volatile boolean isMaster = false;

public SnapshotLifecycleService(Settings settings,
Expand Down Expand Up @@ -142,6 +144,10 @@ public void cleanupDeletedPolicies(final ClusterState state) {
* the same version of a policy has already been scheduled it does not overwrite the job.
*/
public void maybeScheduleSnapshot(final SnapshotLifecyclePolicyMetadata snapshotLifecyclePolicy) {
if (this.running.get() == false) {
return;
}

final String jobId = getJobId(snapshotLifecyclePolicy);
final Pattern existingJobPattern = Pattern.compile(snapshotLifecyclePolicy.getPolicy().getId() + JOB_PATTERN_SUFFIX);

Expand Down Expand Up @@ -219,6 +225,8 @@ public String executorName() {

@Override
public void close() {
this.scheduler.stop();
if (this.running.compareAndSet(true, false)) {
this.scheduler.stop();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ public void testNothingScheduledWhenNotRunning() {
// Since the service is stopped, jobs should have been cancelled
assertThat(sls.getScheduler().scheduledJobIds(), equalTo(Collections.emptySet()));

// No jobs should be scheduled when service is closed
state = createState(new SnapshotLifecycleMetadata(policies, OperationMode.RUNNING, new SnapshotLifecycleStats()));
sls.close();
sls.onMaster();
sls.clusterChanged(new ClusterChangedEvent("1", state, emptyState));
assertThat(sls.getScheduler().scheduledJobIds(), equalTo(Collections.emptySet()));

threadPool.shutdownNow();
}
}
Expand Down

0 comments on commit 217aeaa

Please sign in to comment.