From 1566cfaec9fab2c00d35fa47d7cbcbd5d4120060 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Thu, 26 Apr 2018 14:52:58 -0400 Subject: [PATCH 01/37] Pull in HTTP2-capable client. --- SingularityService/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/SingularityService/pom.xml b/SingularityService/pom.xml index 106295ba81..751e0b0974 100644 --- a/SingularityService/pom.xml +++ b/SingularityService/pom.xml @@ -189,6 +189,12 @@ async-http-client + + com.squareup.okhttp3 + okhttp + 3.10.0 + + io.dropwizard dropwizard-jdbi From b17afada2f7604470721c41a04752683ee76423b Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Thu, 26 Apr 2018 14:53:35 -0400 Subject: [PATCH 02/37] Add & document new protocol options. --- .../src/main/java/com/hubspot/deploy/HealthcheckOptions.java | 2 +- .../main/java/com/hubspot/singularity/HealthcheckProtocol.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java index 292588e1bc..d56998bb05 100644 --- a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java +++ b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java @@ -74,7 +74,7 @@ public Optional getPortNumber() { return portNumber; } - @ApiModelProperty(required=false, value="Healthcheck protocol - HTTP or HTTPS") + @ApiModelProperty(required=false, value="Healthcheck protocol - HTTP or HTTPS for HTTP/1, HTTP2 or HTTPS2 for HTTP/2") public Optional getProtocol() { return protocol; } diff --git a/SingularityBase/src/main/java/com/hubspot/singularity/HealthcheckProtocol.java b/SingularityBase/src/main/java/com/hubspot/singularity/HealthcheckProtocol.java index 71bc88fcfb..f84050a0cb 100644 --- a/SingularityBase/src/main/java/com/hubspot/singularity/HealthcheckProtocol.java +++ b/SingularityBase/src/main/java/com/hubspot/singularity/HealthcheckProtocol.java @@ -2,7 +2,7 @@ public enum HealthcheckProtocol { - HTTP("http"), HTTPS("https"); + HTTP("http"), HTTPS("https"), HTTP2("http"), HTTPS2("https"); private final String protocol; From 9ab089a26a185c748f708edbe4d97d393a1b1b36 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Thu, 26 Apr 2018 14:57:43 -0400 Subject: [PATCH 03/37] Bind client. --- .../java/com/hubspot/singularity/SingularityMainModule.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/SingularityMainModule.java b/SingularityService/src/main/java/com/hubspot/singularity/SingularityMainModule.java index 915b01b054..dcfc29e5b7 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/SingularityMainModule.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/SingularityMainModule.java @@ -84,6 +84,7 @@ import io.dropwizard.jetty.HttpsConnectorFactory; import io.dropwizard.server.DefaultServerFactory; import io.dropwizard.server.SimpleServerFactory; +import okhttp3.OkHttpClient; public class SingularityMainModule implements Module { @@ -157,6 +158,7 @@ public void configure(Binder binder) { binder.bind(MetricRegistry.class).toProvider(DropwizardMetricRegistryProvider.class).in(Scopes.SINGLETON); binder.bind(AsyncHttpClient.class).to(SingularityAsyncHttpClient.class).in(Scopes.SINGLETON); + binder.bind(OkHttpClient.class).in(Scopes.SINGLETON); binder.bind(ServerProvider.class).in(Scopes.SINGLETON); binder.bind(SingularityDropwizardHealthcheck.class).in(Scopes.SINGLETON); From 2c5577164cfcd0a5319184563fb93f0e6f175746 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Thu, 26 Apr 2018 14:58:51 -0400 Subject: [PATCH 04/37] Don't directly implement Ning-specific handler. --- .../SingularityHealthcheckAsyncHandler.java | 22 ++++--------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java index e310935e6f..da17fdd93b 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java @@ -13,10 +13,8 @@ import com.hubspot.singularity.config.SingularityConfiguration; import com.hubspot.singularity.data.TaskManager; import com.hubspot.singularity.sentry.SingularityExceptionNotifier; -import com.ning.http.client.AsyncCompletionHandler; -import com.ning.http.client.Response; -public class SingularityHealthcheckAsyncHandler extends AsyncCompletionHandler { +public class SingularityHealthcheckAsyncHandler { private static final Logger LOG = LoggerFactory.getLogger(SingularityHealthchecker.class); @@ -26,7 +24,6 @@ public class SingularityHealthcheckAsyncHandler extends AsyncCompletionHandler failureStatusCodes; public SingularityHealthcheckAsyncHandler(SingularityExceptionNotifier exceptionNotifier, SingularityConfiguration configuration, SingularityHealthchecker healthchecker, @@ -36,7 +33,6 @@ public SingularityHealthcheckAsyncHandler(SingularityExceptionNotifier exception this.newTaskChecker = newTaskChecker; this.healthchecker = healthchecker; this.task = task; - this.maxHealthcheckResponseBodyBytes = configuration.getMaxHealthcheckResponseBodyBytes(); this.failureStatusCodes = task.getTaskRequest().getDeploy().getHealthcheck().isPresent() ? task.getTaskRequest().getDeploy().getHealthcheck().get().getFailureStatusCodes().or(configuration.getHealthcheckFailureStatusCodes()) : configuration.getHealthcheckFailureStatusCodes(); @@ -44,21 +40,11 @@ public SingularityHealthcheckAsyncHandler(SingularityExceptionNotifier exception startTime = System.currentTimeMillis(); } - @Override - public Response onCompleted(Response response) throws Exception { - Optional responseBody = Optional.absent(); - - if (response.hasResponseBody()) { - responseBody = Optional.of(response.getResponseBodyExcerpt(maxHealthcheckResponseBodyBytes)); - } - - saveResult(Optional.of(response.getStatusCode()), responseBody, Optional. absent(), Optional.absent()); - - return response; + public void onCompleted(Optional statusCode, Optional responseBodyExcerpt) { + saveResult(statusCode, responseBodyExcerpt, Optional. absent(), Optional.absent()); } - @Override - public void onThrowable(Throwable t) { + public void onFailed(Throwable t) { LOG.trace("Exception while making health check for task {}", task.getTaskId(), t); saveResult(Optional. absent(), Optional. absent(), Optional.of(String.format("Healthcheck failed due to exception: %s", t.getMessage())), Optional.of(t)); From cb64ac29746ebfce018e87f88f9faa425a86d2ef Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Thu, 26 Apr 2018 14:59:36 -0400 Subject: [PATCH 05/37] Wrap handler & perform healthcheck using appropriate client. --- .../scheduler/SingularityHealthchecker.java | 84 ++++++++++++++++--- 1 file changed, 73 insertions(+), 11 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java index 2f2853fa7c..df3981d75f 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java @@ -1,5 +1,6 @@ package com.hubspot.singularity.scheduler; +import java.io.IOException; import java.util.Collection; import java.util.Map; import java.util.concurrent.ScheduledExecutorService; @@ -20,8 +21,6 @@ import com.google.inject.Inject; import com.google.inject.name.Named; import com.hubspot.deploy.HealthcheckOptions; -import com.hubspot.singularity.helpers.MesosProtosUtils; -import com.hubspot.singularity.helpers.MesosUtils; import com.hubspot.singularity.ExtendedTaskState; import com.hubspot.singularity.HealthcheckProtocol; import com.hubspot.singularity.SingularityAbort; @@ -36,11 +35,18 @@ import com.hubspot.singularity.config.SingularityConfiguration; import com.hubspot.singularity.data.DisasterManager; import com.hubspot.singularity.data.TaskManager; +import com.hubspot.singularity.helpers.MesosProtosUtils; +import com.hubspot.singularity.helpers.MesosUtils; import com.hubspot.singularity.sentry.SingularityExceptionNotifier; +import com.ning.http.client.AsyncCompletionHandler; import com.ning.http.client.AsyncHttpClient; import com.ning.http.client.PerRequestConfig; import com.ning.http.client.RequestBuilder; +import okhttp3.Call; +import okhttp3.Callback; +import okhttp3.OkHttpClient; + @Singleton public class SingularityHealthchecker { private static final HealthcheckProtocol DEFAULT_HEALTH_CHECK_SCHEME = HealthcheckProtocol.HTTP; @@ -48,6 +54,7 @@ public class SingularityHealthchecker { private static final Logger LOG = LoggerFactory.getLogger(SingularityHealthchecker.class); private final AsyncHttpClient http; + private final OkHttpClient http2; private final SingularityConfiguration configuration; private final TaskManager taskManager; private final SingularityAbort abort; @@ -63,10 +70,11 @@ public class SingularityHealthchecker { @Inject public SingularityHealthchecker(@Named(SingularityMainModule.HEALTHCHECK_THREADPOOL_NAME) ScheduledExecutorService executorService, - AsyncHttpClient http, SingularityConfiguration configuration, SingularityNewTaskChecker newTaskChecker, + AsyncHttpClient http, OkHttpClient http2, SingularityConfiguration configuration, SingularityNewTaskChecker newTaskChecker, TaskManager taskManager, SingularityAbort abort, SingularityExceptionNotifier exceptionNotifier, DisasterManager disasterManager, MesosProtosUtils mesosProtosUtils) { this.http = http; + this.http2 = http2; this.configuration = configuration; this.newTaskChecker = newTaskChecker; this.taskManager = taskManager; @@ -270,6 +278,49 @@ private boolean shouldHealthcheck(final SingularityTask task, final Optional maybeResponseExcerpt = Optional.absent(); + + String responseExcerpt = response.peekBody(configuration.getMaxHealthcheckResponseBodyBytes()).string(); + if (responseExcerpt.length() > 0) { + maybeResponseExcerpt = Optional.of(responseExcerpt); + } + + handler.onCompleted(Optional.of(response.code()), maybeResponseExcerpt); + } + }; + } + + private AsyncCompletionHandler wrappedHttp1Handler(final SingularityHealthcheckAsyncHandler handler) { + return new AsyncCompletionHandler() { + @Override + public void onThrowable(Throwable t) { + handler.onFailed(t); + } + + @Override + public com.ning.http.client.Response onCompleted(com.ning.http.client.Response response) throws Exception { + Optional maybeResponseExcerpt = Optional.absent(); + + if (response.hasResponseBody()) { + maybeResponseExcerpt = Optional.of(response.getResponseBodyExcerpt(configuration.getMaxHealthcheckResponseBodyBytes())); + } + + handler.onCompleted(Optional.of(response.getStatusCode()), maybeResponseExcerpt); + + return response; + } + }; + } + private void asyncHealthcheck(final SingularityTask task) { final SingularityHealthcheckAsyncHandler handler = new SingularityHealthcheckAsyncHandler(exceptionNotifier, configuration, this, newTaskChecker, taskManager, task); final Optional uri = getHealthcheckUri(task); @@ -283,17 +334,28 @@ private void asyncHealthcheck(final SingularityTask task) { task.getTaskRequest().getDeploy().getHealthcheck().get().getResponseTimeoutSeconds().or(configuration.getHealthcheckTimeoutSeconds()) : configuration.getHealthcheckTimeoutSeconds(); try { - PerRequestConfig prc = new PerRequestConfig(); - prc.setRequestTimeoutInMs((int) TimeUnit.SECONDS.toMillis(timeoutSeconds)); - - RequestBuilder builder = new RequestBuilder("GET"); - builder.setFollowRedirects(true); - builder.setUrl(uri.get()); - builder.setPerRequestConfig(prc); + HealthcheckProtocol protocol = task.getTaskRequest().getDeploy().getHealthcheck().get().getProtocol().or(HealthcheckProtocol.HTTP); LOG.trace("Issuing a healthcheck ({}) for task {} with timeout {}s", uri.get(), task.getTaskId(), timeoutSeconds); - http.prepareRequest(builder.build()).execute(handler); + if (protocol == HealthcheckProtocol.HTTP2 || protocol == HealthcheckProtocol.HTTPS2) { + http2.newCall( + new okhttp3.Request.Builder() + .method("GET", null) + .url(uri.get()) + .build() + ).enqueue(wrappedHttp2Handler(handler)); + } else { + PerRequestConfig prc = new PerRequestConfig(); + prc.setRequestTimeoutInMs((int) TimeUnit.SECONDS.toMillis(timeoutSeconds)); + + RequestBuilder builder = new RequestBuilder("GET"); + builder.setFollowRedirects(true); + builder.setUrl(uri.get()); + builder.setPerRequestConfig(prc); + + http.prepareRequest(builder.build()).execute(wrappedHttp1Handler(handler)); + } } catch (Throwable t) { LOG.debug("Exception while preparing healthcheck ({}) for task ({})", uri, task.getTaskId(), t); exceptionNotifier.notify(String.format("Error preparing healthcheck (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); From 8435dda5ba59749c8cdc343cad5352176b8f8a01 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Sat, 12 May 2018 18:19:11 -0400 Subject: [PATCH 06/37] Manage this dependency here until it's moved to basepom. --- SingularityService/pom.xml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/SingularityService/pom.xml b/SingularityService/pom.xml index 751e0b0974..5ff1ec22b7 100644 --- a/SingularityService/pom.xml +++ b/SingularityService/pom.xml @@ -14,6 +14,16 @@ com.hubspot.singularity.SingularityService + + + + com.squareup.okhttp3 + okhttp + 3.10.0 + + + + @@ -192,7 +202,6 @@ com.squareup.okhttp3 okhttp - 3.10.0 From 606244246fbaacb3cb2dfcd7c9891d11fea73b08 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Sat, 12 May 2018 18:19:49 -0400 Subject: [PATCH 07/37] Add managed client which closes underlying resources at shutdown. --- .../singularity/SingularityOkHttpClient.java | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 SingularityService/src/main/java/com/hubspot/singularity/SingularityOkHttpClient.java diff --git a/SingularityService/src/main/java/com/hubspot/singularity/SingularityOkHttpClient.java b/SingularityService/src/main/java/com/hubspot/singularity/SingularityOkHttpClient.java new file mode 100644 index 0000000000..9e3721e763 --- /dev/null +++ b/SingularityService/src/main/java/com/hubspot/singularity/SingularityOkHttpClient.java @@ -0,0 +1,34 @@ +package com.hubspot.singularity; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.inject.Inject; + +import io.dropwizard.lifecycle.Managed; +import okhttp3.OkHttpClient; + +public class SingularityOkHttpClient extends OkHttpClient implements Managed { + private static final Logger LOG = LoggerFactory.getLogger(SingularityOkHttpClient.class); + + @Inject + SingularityOkHttpClient() {} + + @Override + public void start() {} + + @Override + public void stop() { + dispatcher().executorService().shutdown(); + connectionPool().evictAll(); + if (cache() != null) { + try { + cache().delete(); + } catch (IOException e) { + LOG.warn("Unable to clean up client cache!"); + } + } + } +} From 4de7bbec8755c49e9873d334d6755060a9126e8f Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Sat, 12 May 2018 18:20:03 -0400 Subject: [PATCH 08/37] Bind managed client. --- .../java/com/hubspot/singularity/SingularityMainModule.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/SingularityMainModule.java b/SingularityService/src/main/java/com/hubspot/singularity/SingularityMainModule.java index dcfc29e5b7..5da44af37b 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/SingularityMainModule.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/SingularityMainModule.java @@ -158,7 +158,7 @@ public void configure(Binder binder) { binder.bind(MetricRegistry.class).toProvider(DropwizardMetricRegistryProvider.class).in(Scopes.SINGLETON); binder.bind(AsyncHttpClient.class).to(SingularityAsyncHttpClient.class).in(Scopes.SINGLETON); - binder.bind(OkHttpClient.class).in(Scopes.SINGLETON); + binder.bind(OkHttpClient.class).to(SingularityOkHttpClient.class).in(Scopes.SINGLETON); binder.bind(ServerProvider.class).in(Scopes.SINGLETON); binder.bind(SingularityDropwizardHealthcheck.class).in(Scopes.SINGLETON); From 9e055665dae4eb343ee9e539259139f11de1f945 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Sat, 12 May 2018 18:20:37 -0400 Subject: [PATCH 09/37] Configure client before calling healthcheck. --- .../scheduler/SingularityHealthchecker.java | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java index df3981d75f..82d00a9b42 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java @@ -339,12 +339,20 @@ private void asyncHealthcheck(final SingularityTask task) { LOG.trace("Issuing a healthcheck ({}) for task {} with timeout {}s", uri.get(), task.getTaskId(), timeoutSeconds); if (protocol == HealthcheckProtocol.HTTP2 || protocol == HealthcheckProtocol.HTTPS2) { - http2.newCall( - new okhttp3.Request.Builder() - .method("GET", null) - .url(uri.get()) - .build() - ).enqueue(wrappedHttp2Handler(handler)); + // Creates a lightweight new client which shares the underlying resource pools of the original instance. + http2.newBuilder() + .retryOnConnectionFailure(false) + .followRedirects(true) + .connectTimeout(timeoutSeconds, TimeUnit.SECONDS) + .readTimeout(timeoutSeconds, TimeUnit.SECONDS) + .cache(null) + .build() + .newCall( + new okhttp3.Request.Builder() + .method("GET", null) + .url(uri.get()) + .build() + ).enqueue(wrappedHttp2Handler(handler)); } else { PerRequestConfig prc = new PerRequestConfig(); prc.setRequestTimeoutInMs((int) TimeUnit.SECONDS.toMillis(timeoutSeconds)); From 2e968996f8b11b56b83eceab41c3f3ea295df7bb Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Sat, 12 May 2018 18:49:01 -0400 Subject: [PATCH 10/37] Add enum for supported healthcheck methods. --- .../hubspot/singularity/HealthcheckMethod.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 SingularityBase/src/main/java/com/hubspot/singularity/HealthcheckMethod.java diff --git a/SingularityBase/src/main/java/com/hubspot/singularity/HealthcheckMethod.java b/SingularityBase/src/main/java/com/hubspot/singularity/HealthcheckMethod.java new file mode 100644 index 0000000000..7a3adc81ef --- /dev/null +++ b/SingularityBase/src/main/java/com/hubspot/singularity/HealthcheckMethod.java @@ -0,0 +1,16 @@ +package com.hubspot.singularity; + +public enum HealthcheckMethod { + + GET("GET"), POST("POST"); + + private String method; + + private HealthcheckMethod(String method) { + this.method = method; + } + + public String getMethod() { + return method; + } +} From 8c5f0d0d86c8deadf01c91baa8af74fd13bfac4d Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Sat, 12 May 2018 18:49:36 -0400 Subject: [PATCH 11/37] Wire up healthcheck method in options object. --- .../hubspot/deploy/HealthcheckOptions.java | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java index d56998bb05..4abab34003 100644 --- a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java +++ b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Optional; +import com.hubspot.singularity.HealthcheckMethod; import com.hubspot.singularity.HealthcheckProtocol; import com.wordnik.swagger.annotations.ApiModelProperty; @@ -18,6 +19,7 @@ public class HealthcheckOptions { private final Optional portIndex; private final Optional portNumber; private final Optional protocol; + private final Optional method; private final Optional startupTimeoutSeconds; private final Optional startupDelaySeconds; private final Optional startupIntervalSeconds; @@ -27,14 +29,23 @@ public class HealthcheckOptions { private final Optional> failureStatusCodes; @JsonCreator - public HealthcheckOptions(@JsonProperty("uri") String uri, @JsonProperty("portIndex") Optional portIndex, @JsonProperty("portNumber") Optional portNumber, @JsonProperty("protocol") Optional protocol, - @JsonProperty("startupTimeoutSeconds") Optional startupTimeoutSeconds, @JsonProperty("startupDelaySeconds") Optional startupDelaySeconds, @JsonProperty("startupIntervalSeconds") Optional startupIntervalSeconds, - @JsonProperty("intervalSeconds") Optional intervalSeconds, @JsonProperty("responseTimeoutSeconds") Optional responseTimeoutSeconds, @JsonProperty("maxRetries") Optional maxRetries, - @JsonProperty("failureStatusCodes") Optional> failureStatusCodes) { + public HealthcheckOptions(@JsonProperty("uri") String uri, + @JsonProperty("portIndex") Optional portIndex, + @JsonProperty("portNumber") Optional portNumber, + @JsonProperty("protocol") Optional protocol, + @JsonProperty("method") Optional method, + @JsonProperty("startupTimeoutSeconds") Optional startupTimeoutSeconds, + @JsonProperty("startupDelaySeconds") Optional startupDelaySeconds, + @JsonProperty("startupIntervalSeconds") Optional startupIntervalSeconds, + @JsonProperty("intervalSeconds") Optional intervalSeconds, + @JsonProperty("responseTimeoutSeconds") Optional responseTimeoutSeconds, + @JsonProperty("maxRetries") Optional maxRetries, + @JsonProperty("failureStatusCodes") Optional> failureStatusCodes) { this.uri = uri; this.portIndex = portIndex; this.portNumber = portNumber; this.protocol = protocol; + this.method = method; this.startupTimeoutSeconds = startupTimeoutSeconds; this.startupDelaySeconds = startupDelaySeconds; this.startupIntervalSeconds = startupIntervalSeconds; @@ -50,6 +61,7 @@ public HealthcheckOptionsBuilder toBuilder() { .setPortIndex(portIndex) .setPortNumber(portNumber) .setProtocol(protocol) + .setMethod(method) .setStartupTimeoutSeconds(startupTimeoutSeconds) .setStartupDelaySeconds(startupDelaySeconds) .setStartupIntervalSeconds(startupIntervalSeconds) @@ -79,6 +91,11 @@ public Optional getProtocol() { return protocol; } + @ApiModelProperty(required=false, value="Healthcheck HTTP method - GET or POST, GET by default") + public Optional getMethod() { + return method; + } + @ApiModelProperty(required=false, value="Consider the task unhealthy/failed if the app has not started responding to healthchecks in this amount of time") public Optional getStartupTimeoutSeconds() { return startupTimeoutSeconds; @@ -138,7 +155,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(uri, portIndex, portNumber, protocol, startupTimeoutSeconds, startupDelaySeconds, startupIntervalSeconds, intervalSeconds, responseTimeoutSeconds, maxRetries, failureStatusCodes); + return Objects.hash(uri, portIndex, portNumber, protocol, method, startupTimeoutSeconds, startupDelaySeconds, startupIntervalSeconds, intervalSeconds, responseTimeoutSeconds, maxRetries, failureStatusCodes); } @Override @@ -148,6 +165,7 @@ public String toString() { ", portIndex=" + portIndex + ", portNumber=" + portNumber + ", protocol=" + protocol + + ", method=" + method + ", startupTimeoutSeconds=" + startupTimeoutSeconds + ", startupDelaySeconds=" + startupDelaySeconds + ", startupIntervalSeconds=" + startupIntervalSeconds + From 260df3646ffe7f1b3ee3f98a969db6406d970702 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Sat, 12 May 2018 18:49:59 -0400 Subject: [PATCH 12/37] Add new healthcheck parameter to builder. --- .../deploy/HealthcheckOptionsBuilder.java | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptionsBuilder.java b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptionsBuilder.java index 8e439acbac..2b0ac3c0fc 100644 --- a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptionsBuilder.java +++ b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptionsBuilder.java @@ -6,6 +6,7 @@ import javax.validation.constraints.NotNull; import com.google.common.base.Optional; +import com.hubspot.singularity.HealthcheckMethod; import com.hubspot.singularity.HealthcheckProtocol; public class HealthcheckOptionsBuilder { @@ -14,6 +15,7 @@ public class HealthcheckOptionsBuilder { private Optional portIndex; private Optional portNumber; private Optional protocol; + private Optional method; private Optional startupTimeoutSeconds; private Optional startupDelaySeconds; private Optional startupIntervalSeconds; @@ -27,6 +29,7 @@ public HealthcheckOptionsBuilder(String uri) { this.portIndex = Optional.absent(); this.portNumber = Optional.absent(); this.protocol = Optional.absent(); + this.method = Optional.absent(); this.startupTimeoutSeconds = Optional.absent(); this.startupDelaySeconds = Optional.absent(); this.startupIntervalSeconds = Optional.absent(); @@ -72,6 +75,15 @@ public HealthcheckOptionsBuilder setProtocol(Optional proto return this; } + public Optional getMethod() { + return method; + } + + public HealthcheckOptionsBuilder setMethod(Optional method) { + this.method = method; + return this; + } + public Optional getStartupTimeoutSeconds() { return startupTimeoutSeconds; } @@ -136,7 +148,7 @@ public HealthcheckOptionsBuilder setFailureStatusCodes(Optional> f } public HealthcheckOptions build() { - return new HealthcheckOptions(uri, portIndex, portNumber, protocol, startupTimeoutSeconds, startupDelaySeconds, startupIntervalSeconds, intervalSeconds, responseTimeoutSeconds, maxRetries, failureStatusCodes); + return new HealthcheckOptions(uri, portIndex, portNumber, protocol, method, startupTimeoutSeconds, startupDelaySeconds, startupIntervalSeconds, intervalSeconds, responseTimeoutSeconds, maxRetries, failureStatusCodes); } @Override @@ -163,7 +175,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(uri, portIndex, portNumber, protocol, startupTimeoutSeconds, startupDelaySeconds, startupIntervalSeconds, intervalSeconds, responseTimeoutSeconds, maxRetries, failureStatusCodes); + return Objects.hash(uri, portIndex, portNumber, protocol, method, startupTimeoutSeconds, startupDelaySeconds, startupIntervalSeconds, intervalSeconds, responseTimeoutSeconds, maxRetries, failureStatusCodes); } @Override @@ -173,6 +185,7 @@ public String toString() { ", portIndex=" + portIndex + ", portNumber=" + portNumber + ", protocol=" + protocol + + ", method=" + method + ", startupTimeoutSeconds=" + startupTimeoutSeconds + ", startupDelaySeconds=" + startupDelaySeconds + ", startupIntervalSeconds=" + startupIntervalSeconds + From da2293859a88ff42f27153676094e8743f463eb0 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Sat, 12 May 2018 18:50:48 -0400 Subject: [PATCH 13/37] Respect configured healthcheck method. --- .../scheduler/SingularityHealthchecker.java | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java index 82d00a9b42..ebf661a5e0 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java @@ -22,6 +22,7 @@ import com.google.inject.name.Named; import com.hubspot.deploy.HealthcheckOptions; import com.hubspot.singularity.ExtendedTaskState; +import com.hubspot.singularity.HealthcheckMethod; import com.hubspot.singularity.HealthcheckProtocol; import com.hubspot.singularity.SingularityAbort; import com.hubspot.singularity.SingularityAction; @@ -330,8 +331,18 @@ private void asyncHealthcheck(final SingularityTask task) { return; } - final Integer timeoutSeconds = task.getTaskRequest().getDeploy().getHealthcheck().isPresent() ? - task.getTaskRequest().getDeploy().getHealthcheck().get().getResponseTimeoutSeconds().or(configuration.getHealthcheckTimeoutSeconds()) : configuration.getHealthcheckTimeoutSeconds(); + final Integer timeoutSeconds; + final String method; + + if (task.getTaskRequest().getDeploy().getHealthcheck().isPresent()) { + HealthcheckOptions options = task.getTaskRequest().getDeploy().getHealthcheck().get(); + + method = options.getMethod().or(HealthcheckMethod.GET).getMethod(); + timeoutSeconds = options.getResponseTimeoutSeconds().or(configuration.getHealthcheckTimeoutSeconds()); + } else { + timeoutSeconds = configuration.getHealthcheckTimeoutSeconds(); + method = HealthcheckMethod.GET.getMethod(); + } try { HealthcheckProtocol protocol = task.getTaskRequest().getDeploy().getHealthcheck().get().getProtocol().or(HealthcheckProtocol.HTTP); @@ -349,7 +360,7 @@ private void asyncHealthcheck(final SingularityTask task) { .build() .newCall( new okhttp3.Request.Builder() - .method("GET", null) + .method(method, null) .url(uri.get()) .build() ).enqueue(wrappedHttp2Handler(handler)); @@ -357,7 +368,7 @@ private void asyncHealthcheck(final SingularityTask task) { PerRequestConfig prc = new PerRequestConfig(); prc.setRequestTimeoutInMs((int) TimeUnit.SECONDS.toMillis(timeoutSeconds)); - RequestBuilder builder = new RequestBuilder("GET"); + RequestBuilder builder = new RequestBuilder(method); builder.setFollowRedirects(true); builder.setUrl(uri.get()); builder.setPerRequestConfig(prc); From 2a54acb6f1964f7f5b0d9005ae167b99fd47e3e8 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Sat, 12 May 2018 18:51:40 -0400 Subject: [PATCH 14/37] Satisfy this constructor via deprecated deploy healthcheck options config. --- .../src/main/java/com/hubspot/singularity/SingularityDeploy.java | 1 + 1 file changed, 1 insertion(+) diff --git a/SingularityBase/src/main/java/com/hubspot/singularity/SingularityDeploy.java b/SingularityBase/src/main/java/com/hubspot/singularity/SingularityDeploy.java index ef66e78494..d4e8ed9fca 100644 --- a/SingularityBase/src/main/java/com/hubspot/singularity/SingularityDeploy.java +++ b/SingularityBase/src/main/java/com/hubspot/singularity/SingularityDeploy.java @@ -212,6 +212,7 @@ public SingularityDeploy(@JsonProperty("requestId") String requestId, healthcheckPortIndex, Optional.absent(), healthcheckProtocol, + Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), From 9b15faf9cbcf8af2484dbdc363f819a821d6c026 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Mon, 14 May 2018 15:35:31 -0400 Subject: [PATCH 15/37] Update equals() --- .../src/main/java/com/hubspot/deploy/HealthcheckOptions.java | 1 + .../main/java/com/hubspot/deploy/HealthcheckOptionsBuilder.java | 1 + 2 files changed, 2 insertions(+) diff --git a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java index 4abab34003..a5f076f0f6 100644 --- a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java +++ b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptions.java @@ -144,6 +144,7 @@ public boolean equals(Object o) { Objects.equals(portIndex, that.portIndex) && Objects.equals(portNumber, that.portNumber) && Objects.equals(protocol, that.protocol) && + Objects.equals(method, that.method) && Objects.equals(startupTimeoutSeconds, that.startupTimeoutSeconds) && Objects.equals(startupDelaySeconds, that.startupDelaySeconds) && Objects.equals(startupIntervalSeconds, that.startupIntervalSeconds) && diff --git a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptionsBuilder.java b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptionsBuilder.java index 2b0ac3c0fc..3656ab9f50 100644 --- a/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptionsBuilder.java +++ b/SingularityBase/src/main/java/com/hubspot/deploy/HealthcheckOptionsBuilder.java @@ -164,6 +164,7 @@ public boolean equals(Object o) { Objects.equals(portIndex, that.portIndex) && Objects.equals(portNumber, that.portNumber) && Objects.equals(protocol, that.protocol) && + Objects.equals(method, that.method) && Objects.equals(startupTimeoutSeconds, that.startupTimeoutSeconds) && Objects.equals(startupDelaySeconds, that.startupDelaySeconds) && Objects.equals(startupIntervalSeconds, that.startupIntervalSeconds) && From f8ae2e3407ee95e532becb247a7d82bd7b730d76 Mon Sep 17 00:00:00 2001 From: Gowtam Lal Date: Mon, 11 Jun 2018 17:52:10 -0400 Subject: [PATCH 16/37] UI support. --- .../newDeployForm/NewDeployForm.jsx | 21 +++++++++++++++++-- .../app/components/newDeployForm/fields.es6 | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/SingularityUI/app/components/newDeployForm/NewDeployForm.jsx b/SingularityUI/app/components/newDeployForm/NewDeployForm.jsx index 595c67daf4..948e322052 100644 --- a/SingularityUI/app/components/newDeployForm/NewDeployForm.jsx +++ b/SingularityUI/app/components/newDeployForm/NewDeployForm.jsx @@ -1330,7 +1330,21 @@ class NewDeployForm extends Component { onChange={newValue => this.updateField('protocol', newValue.value)} options={[ { label: 'HTTP', value: 'HTTP' }, - { label: 'HTTPS', value: 'HTTPS' } + { label: 'HTTPS', value: 'HTTPS' }, + { label: 'HTTP2', value: 'HTTP2' }, + { label: 'HTTPS2', value: 'HTTPS2' } + ]} + /> + ); + const healthCheckMethod = ( + this.updateField('method', newValue.value)} + options={[ + { label: 'GET', value: 'GET' }, + { label: 'POST', value: 'POST' } ]} /> ); @@ -1565,9 +1579,12 @@ class NewDeployForm extends Component {
-
+
{healthCheckProtocol}
+
+ {healthCheckMethod} +
{healthcheckStartupDelaySeconds}
diff --git a/SingularityUI/app/components/newDeployForm/fields.es6 b/SingularityUI/app/components/newDeployForm/fields.es6 index f44f067e8f..8b7f7fe653 100644 --- a/SingularityUI/app/components/newDeployForm/fields.es6 +++ b/SingularityUI/app/components/newDeployForm/fields.es6 @@ -86,6 +86,7 @@ export const FIELDS = { {id: 'portIndex', type: 'number'}, {id: 'portNumber', type: 'number'}, {id: 'protocol', type: 'text', default: 'HTTP'}, + {id: 'method', type: 'text', default: 'GET'}, {id: 'startupDelaySeconds', type: 'number'}, {id: 'startupTimeoutSeconds', type: 'number'}, {id: 'startupIntervalSeconds', type: 'number'}, From ba3cecd76ced9c189640fab20246cb0882dbc158 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Thu, 14 Mar 2019 16:42:15 -0400 Subject: [PATCH 17/37] Make Singularity report byte counts to monitor against jute buffer size --- .../singularity/data/RequestManager.java | 4 ++++ .../hubspot/singularity/data/TaskManager.java | 16 ++++++++++++++ .../resources/MetricsResource.java | 22 ++++++++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/RequestManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/RequestManager.java index b2ea97e9dd..b55560652f 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/RequestManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/RequestManager.java @@ -201,6 +201,10 @@ public List getAllRequestIds() { return getChildren(NORMAL_PATH_ROOT); } + public long getAllRequestIdsBytes() { + return getAllRequestIds().stream().mapToLong(x -> x.getBytes().length).sum(); + } + public List getRequestIdsWithHistory() { return getChildren(HISTORY_PATH_ROOT); } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java index ab55c48f41..ca8111443d 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java @@ -1136,4 +1136,20 @@ public void purgeStaleRequests(List activeRequestIds, long deleteBeforeT public SingularityDeleteResult deleteRequestId(String requestId) { return delete(getRequestPath(requestId)); } + + public long getTaskStatusBytes() { + return countBytes(getChildren(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT)); + } + + public long getActiveTaskIdBytes() { + return countBytes(getChildren(ACTIVE_PATH_ROOT)); + } + + public long getTaskHistoryIdBytes() { + return countBytes(getChildren(HISTORY_PATH_ROOT)); + } + + private long countBytes(List list) { + return list.stream().mapToLong(x -> x.getBytes().length).sum(); + } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/resources/MetricsResource.java b/SingularityService/src/main/java/com/hubspot/singularity/resources/MetricsResource.java index fe4a92ac5f..b8bfcdafd7 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/resources/MetricsResource.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/resources/MetricsResource.java @@ -10,8 +10,11 @@ import com.codahale.metrics.Metric; import com.codahale.metrics.MetricRegistry; +import com.google.common.collect.ImmutableMap; import com.google.inject.Inject; import com.hubspot.singularity.config.ApiPaths; +import com.hubspot.singularity.data.RequestManager; +import com.hubspot.singularity.data.TaskManager; import com.hubspot.singularity.metrics.SingularityMetricsContainer; import io.swagger.v3.oas.annotations.Operation; @@ -25,10 +28,16 @@ @Tags({@Tag(name = "Metrics")}) public class MetricsResource { private final MetricRegistry registry; + private final RequestManager requestManager; + private final TaskManager taskManager; @Inject - public MetricsResource(MetricRegistry registry) { + public MetricsResource(MetricRegistry registry, + RequestManager requestManager, + TaskManager taskManager) { this.registry = registry; + this.requestManager = requestManager; + this.taskManager = taskManager; } @GET @@ -39,4 +48,15 @@ public SingularityMetricsContainer getRegistry() { metrics.entrySet().removeIf((e) -> e.getKey().contains("ManagedPooledDataSource")); return new SingularityMetricsContainer(metrics); } + + @GET + @Path("/zk-bytes") + public Map getZkBytesMetrics() { + return ImmutableMap.of( + "allRequestIds", requestManager.getAllRequestIdsBytes(), + "taskStatuses", taskManager.getTaskStatusBytes(), + "activeTaskIds", taskManager.getActiveTaskIdBytes(), + "taskHistoryIds", taskManager.getTaskHistoryIdBytes() + ); + } } From d9c4dbf862357ae0c1593233f83c930a7114b5cc Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 08:38:59 -0400 Subject: [PATCH 18/37] migration to clean old zk nodes, namespace active tasks, namespace pending tasks --- .../singularity/data/CuratorAsyncManager.java | 95 +++++++++++++++++-- .../hubspot/singularity/data/TaskManager.java | 86 ++++++++++------- .../zkmigrations/CleanOldNodesMigration.java | 32 +++++++ .../NamespaceActiveTasksMigration.java | 47 +++++++++ .../NamespacePendingTasksMigration.java | 44 +++++++++ .../SingularityZkMigrationsModule.java | 3 + .../SingularityMesosStatusUpdateHandler.java | 10 +- .../singularity/resources/TaskResource.java | 7 +- .../resources/TaskTrackerResource.java | 2 +- .../scheduler/SingularityCleaner.java | 16 ++-- .../SingularityHealthcheckAsyncHandler.java | 2 +- .../scheduler/SingularityLeaderCache.java | 8 +- .../scheduler/SingularityNewTaskChecker.java | 2 +- .../scheduler/SingularityScheduler.java | 6 +- ...ularityTaskShellCommandDispatchPoller.java | 2 +- .../mesos/SingularityStartupTest.java | 2 +- .../scheduler/SingularitySchedulerTest.java | 4 +- 17 files changed, 291 insertions(+), 77 deletions(-) create mode 100644 SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java create mode 100644 SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java create mode 100644 SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespacePendingTasksMigration.java diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java index 153f4a6434..b51ab39fe3 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java @@ -21,7 +21,6 @@ import com.codahale.metrics.MetricRegistry; import com.google.common.base.Optional; -import com.google.common.base.Throwables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.hubspot.singularity.SingularityId; @@ -155,7 +154,7 @@ protected List getChildrenAsIdsForParents(final Str try { return getChildrenAsIdsForParentsThrows(pathNameforLogs, parents, idTranscoder); } catch (Throwable t) { - throw Throwables.propagate(t); + throw new RuntimeException(t); } } @@ -193,7 +192,7 @@ protected List exists(final String pathNameForLogs, try { return existsThrows(pathNameForLogs, paths, idTranscoder); } catch (Throwable t) { - throw Throwables.propagate(t); + throw new RuntimeException(t); } } @@ -227,7 +226,7 @@ protected List notExists(final String pathNameForLo try { return notExistsThrows(pathNameForLogs, pathsMap); } catch (Throwable t) { - throw Throwables.propagate(t); + throw new RuntimeException(t); } } @@ -235,7 +234,7 @@ protected List getAsync(final String pathNameForLogs, final Collection(getAsyncThrows(pathNameForLogs, paths, transcoder, Optional.of(cache)).values()); } catch (Throwable t) { - throw Throwables.propagate(t); + throw new RuntimeException(t); } } @@ -243,7 +242,7 @@ protected List getAsync(final String pathNameForLogs, final Collection(getAsyncThrows(pathNameForLogs, paths, transcoder, Optional.> absent()).values()); } catch (Throwable t) { - throw Throwables.propagate(t); + throw new RuntimeException(t); } } @@ -251,7 +250,7 @@ protected Map getAsyncWithPath(final String pathNameForLogs, fina try { return getAsyncThrows(pathNameForLogs, paths, transcoder, Optional.> absent()); } catch (Throwable t) { - throw Throwables.propagate(t); + throw new RuntimeException(t); } } @@ -259,7 +258,49 @@ protected List getAsyncChildren(final String parent, final Transcoder try { return getAsyncChildrenThrows(parent, transcoder); } catch (Throwable t) { - throw Throwables.propagate(t); + throw new RuntimeException(t); + } + } + + protected List getAsyncNestedChildrenAsListThrows(final String pathNameForLogs, final List parentPaths, final Transcoder transcoder) throws Exception { + final List allPaths = new ArrayList<>(); + for (String parent : parentPaths) { + for (String child : getChildren(parent)) { + allPaths.add(ZKPaths.makePath(parent, child)); + } + } + + final List results = new ArrayList<>(); + final CountDownLatch latch = new CountDownLatch(allPaths.size()); + final AtomicInteger bytes = new AtomicInteger(); + final BackgroundCallback callback = new BackgroundCallback() { + + @Override + public void processResult(CuratorFramework client, CuratorEvent event) throws Exception { + try { + if (event.getData() == null || event.getData().length == 0) { + LOG.trace("Expected active node {} but it wasn't there", event.getPath()); + return; + } + bytes.getAndAdd(event.getData().length); + + final T object = transcoder.fromBytes(event.getData()); + + results.add(object); + } finally { + latch.countDown(); + } + } + }; + + return queryAndReturnResultsThrows(results, allPaths, callback, latch, pathNameForLogs, bytes, CuratorQueryMethod.GET_DATA); + } + + protected List getAsyncNestedChildrenAsList(final String pathNameForLogs, final List parentPaths, final Transcoder transcoder) { + try { + return getAsyncNestedChildrenAsListThrows(pathNameForLogs, parentPaths, transcoder); + } catch (Throwable t) { + throw new RuntimeException(t); } } @@ -304,7 +345,43 @@ protected Map> getAsyncNestedChildDataAsMap(final String pathN try { return getAsyncNestedChildDataAsMapThrows(pathNameForLogs, parentPathsMap, subpath, transcoder); } catch (Throwable t) { - throw Throwables.propagate(t); + throw new RuntimeException(t); + } + } + + protected List getAsyncNestedChildIdsAsListThrows(final String pathNameForLogs, final String parentPath, final IdTranscoder transcoder) throws Exception { + final List allPaths = new ArrayList<>(); + for (String child : getChildren(parentPath)) { + allPaths.add(ZKPaths.makePath(parentPath, child)); + } + + final List results = new ArrayList<>(); + final CountDownLatch latch = new CountDownLatch(allPaths.size()); + final AtomicInteger bytes = new AtomicInteger(); + final BackgroundCallback callback = new BackgroundCallback() { + + @Override + public void processResult(CuratorFramework client, CuratorEvent event) throws Exception { + try { + event.getChildren().forEach((child) -> { + final T object = transcoder.fromString(child); + bytes.getAndAdd(child.getBytes().length); + results.add(object); + }); + } finally { + latch.countDown(); + } + } + }; + + return queryAndReturnResultsThrows(results, allPaths, callback, latch, pathNameForLogs, bytes, CuratorQueryMethod.GET_CHILDREN); + } + + protected List getAsyncNestedChildIdsAsList(final String pathNameForLogs, final String parentPath, final IdTranscoder transcoder) { + try { + return getAsyncNestedChildIdsAsListThrows(pathNameForLogs, parentPath, transcoder); + } catch (Throwable t) { + throw new RuntimeException(t); } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java index ab55c48f41..d7ccdbad1b 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java @@ -69,7 +69,6 @@ public class TaskManager extends CuratorAsyncManager { private static final String TASKS_ROOT = "/tasks"; - private static final String ACTIVE_PATH_ROOT = TASKS_ROOT + "/active"; private static final String LAST_ACTIVE_TASK_STATUSES_PATH_ROOT = TASKS_ROOT + "/statuses"; private static final String PENDING_PATH_ROOT = TASKS_ROOT + "/scheduled"; private static final String CLEANUP_PATH_ROOT = TASKS_ROOT + "/cleanup"; @@ -158,7 +157,7 @@ public TaskManager(CuratorFramework curator, SingularityConfiguration configurat // since we can't use creatingParentsIfNeeded in transactions public void createRequiredParents() { create(HISTORY_PATH_ROOT); - create(ACTIVE_PATH_ROOT); + create(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT); } private String getLastHealthcheckPath(SingularityTaskId taskId) { @@ -182,7 +181,11 @@ private String getHealthchecksFinishedPath(SingularityTaskId taskId) { } private String getLastActiveTaskStatusPath(SingularityTaskId taskId) { - return ZKPaths.makePath(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT, taskId.getId()); + return ZKPaths.makePath(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT, taskId.getRequestId(), taskId.getId()); + } + + private String getLastActiveTaskParent(String requestId) { + return ZKPaths.makePath(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT, requestId); } private String getHealthcheckPath(SingularityTaskHealthcheckResult healthcheck) { @@ -253,12 +256,12 @@ private String getHistoryPath(SingularityTaskId taskId) { return ZKPaths.makePath(getRequestPath(taskId.getRequestId()), taskId.getId()); } - private String getActivePath(String taskId) { - return ZKPaths.makePath(ACTIVE_PATH_ROOT, taskId); + private String getPendingPath(SingularityPendingTaskId pendingTaskId) { + return ZKPaths.makePath(PENDING_PATH_ROOT, pendingTaskId.getRequestId(), pendingTaskId.getId()); } - private String getPendingPath(SingularityPendingTaskId pendingTaskId) { - return ZKPaths.makePath(PENDING_PATH_ROOT, pendingTaskId.getId()); + private String getPendingForRequestPath(String requestId) { + return ZKPaths.makePath(PENDING_PATH_ROOT, requestId); } private String getPendingTasksToDeletePath(SingularityPendingTaskId pendingTaskId) { return ZKPaths.makePath(PENDING_TASKS_TO_DELETE_PATH_ROOT, pendingTaskId.getId()); } @@ -287,14 +290,22 @@ public int getNumActiveTasks() { if (leaderCache.active()) { return leaderCache.getNumActiveTasks(); } - return getNumChildren(ACTIVE_PATH_ROOT); + int total = 0; + for (String requestId : getChildren(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT)) { + total += getNumChildren(getLastActiveTaskParent(requestId)); + } + return total; } public int getNumScheduledTasks() { if (leaderCache.active()) { return leaderCache.getNumPendingTasks(); } - return getNumChildren(PENDING_PATH_ROOT); + int total = 0; + for (String requestId : getChildren(PENDING_PATH_ROOT)) { + total += getNumChildren(getPendingForRequestPath(requestId)); + } + return total; } public void saveLoadBalancerState(SingularityTaskId taskId, LoadBalancerRequestType requestType, SingularityLoadBalancerUpdate lbUpdate) { @@ -370,7 +381,14 @@ public List getActiveTaskIdsAsStrings() { if (leaderCache.active()) { return leaderCache.getActiveTaskIdsAsStrings(); } - return getChildren(ACTIVE_PATH_ROOT); + + List results = new ArrayList<>(); + + for (String requestId : getChildren(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT)) { + results.addAll(getChildren(getLastActiveTaskParent(requestId))); + } + + return results; } public List getActiveTaskIds() { @@ -386,7 +404,7 @@ public List getActiveTaskIds(boolean useWebCache) { return webCache.getActiveTaskIds(); } - return getTaskIds(ACTIVE_PATH_ROOT); + return getAsyncNestedChildIdsAsList(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT, LAST_ACTIVE_TASK_STATUSES_PATH_ROOT, taskIdTranscoder); } public List getCleanupTaskIds() { @@ -609,12 +627,12 @@ public SingularityDeleteResult deleteTaskHistoryUpdate(SingularityTaskId taskId, return delete(getUpdatePath(taskId, state)); } - public boolean isActiveTask(String taskId) { + public boolean isActiveTask(SingularityTaskId taskId) { if (leaderCache.active()) { return leaderCache.isActiveTask(taskId); } - return exists(getActivePath(taskId)); + return exists(getLastActiveTaskStatusPath(taskId)); } public SingularityCreateResult markUnhealthyKill(SingularityTaskId taskId) { @@ -669,7 +687,7 @@ public List filterActiveTaskIds(List taskI final List paths = Lists.newArrayListWithCapacity(taskIds.size()); for (SingularityTaskId taskId : taskIds) { - paths.add(getActivePath(taskId.getId())); + paths.add(getLastActiveTaskStatusPath(taskId)); } return exists("filterActiveTaskIds", paths, taskIdTranscoder); @@ -695,7 +713,7 @@ public List filterInactiveTaskIds(List tas final Map pathsMap = Maps.newHashMap(); for (SingularityTaskId taskId : taskIds) { - pathsMap.put(getActivePath(taskId.getId()), taskId); + pathsMap.put(getLastActiveTaskStatusPath(taskId), taskId); } return notExists("filterInactiveTaskIds", pathsMap); @@ -830,14 +848,14 @@ public boolean taskExistsInZk(SingularityTaskId taskId) { public void activateLeaderCache() { leaderCache.cachePendingTasks(fetchPendingTasks()); leaderCache.cachePendingTasksToDelete(getPendingTasksMarkedForDeletion()); - leaderCache.cacheActiveTaskIds(getTaskIds(ACTIVE_PATH_ROOT)); + leaderCache.cacheActiveTaskIds(getActiveTaskIds(false)); leaderCache.cacheCleanupTasks(fetchCleanupTasks()); leaderCache.cacheKilledTasks(fetchKilledTaskIdRecords()); leaderCache.cacheTaskHistoryUpdates(getAllTaskHistoryUpdates()); } private List fetchPendingTasks() { - return getAsyncChildren(PENDING_PATH_ROOT, pendingTaskTranscoder); + return getAsyncNestedChildrenAsList(PENDING_PATH_ROOT, getChildren(PENDING_PATH_ROOT), pendingTaskTranscoder); } public List getPendingTaskIds() { @@ -853,22 +871,26 @@ public List getPendingTaskIds(boolean useWebCache) { return webCache.getPendingTaskIds(); } - return getChildrenAsIds(PENDING_PATH_ROOT, pendingTaskIdTranscoder); + return getAsyncNestedChildIdsAsList(PENDING_PATH_ROOT, PENDING_PATH_ROOT, pendingTaskIdTranscoder); } public List getPendingTaskIdsForRequest(final String requestId) { - List pendingTaskIds = getPendingTaskIds(); - return pendingTaskIds.stream() - .filter(pendingTaskId -> pendingTaskId.getRequestId().equals(requestId)) - .collect(Collectors.collectingAndThen(Collectors.toList(), ImmutableList::copyOf)); + return getChildrenAsIds(getPendingForRequestPath(requestId), pendingTaskIdTranscoder); } - public List getPendingTasksForRequest(final String requestId) { - return getAsync( - PENDING_PATH_ROOT, - getPendingTaskIdsForRequest(requestId).stream().map(this::getPendingPath).collect(Collectors.toList()), - pendingTaskTranscoder - ); + public List getPendingTasksForRequest(final String requestId, boolean useWebCache) { + if (leaderCache.active()) { + return leaderCache.getPendingTasks().stream() + .filter((p) -> p.getPendingTaskId().getRequestId().equals(requestId)) + .collect(Collectors.toList()); + } + + if (useWebCache && webCache.useCachedPendingTasks()) { + return webCache.getPendingTasks().stream() + .filter((p) -> p.getPendingTaskId().getRequestId().equals(requestId)) + .collect(Collectors.toList()); + } + return getAsyncChildren(getPendingForRequestPath(requestId), pendingTaskTranscoder); } public List getPendingTasks() { @@ -935,7 +957,7 @@ private void createTaskAndDeletePendingTaskPrivate(SingularityTask task) throws CuratorTransactionFinal transaction = curator.inTransaction().create().forPath(path, taskTranscoder.toBytes(task)).and(); - transaction.create().forPath(getActivePath(task.getTaskId().getId())).and().commit(); + transaction.create().forPath(getLastActiveTaskStatusPath(task.getTaskId())).and().commit(); leaderCache.putActiveTask(task); taskCache.set(path, task); @@ -991,6 +1013,7 @@ public SingularityDeleteResult deleteKilledRecord(SingularityTaskId taskId) { @Timed public SingularityDeleteResult deleteLastActiveTaskStatus(SingularityTaskId taskId) { + leaderCache.deleteActiveTaskId(taskId); return delete(getLastActiveTaskStatusPath(taskId)); } @@ -1083,11 +1106,6 @@ public SingularityCreateResult createTaskCleanup(SingularityTaskCleanup cleanup) return result; } - public void deleteActiveTask(String taskId) { - leaderCache.deleteActiveTaskId(taskId); - delete(getActivePath(taskId)); - } - public void deletePendingTask(SingularityPendingTaskId pendingTaskId) { leaderCache.deletePendingTask(pendingTaskId); delete(getPendingPath(pendingTaskId)); diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java new file mode 100644 index 0000000000..a730725871 --- /dev/null +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java @@ -0,0 +1,32 @@ +package com.hubspot.singularity.data.zkmigrations; + +import java.util.List; + +import org.apache.curator.framework.CuratorFramework; + +import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; + +public class CleanOldNodesMigration extends ZkDataMigration { + private final CuratorFramework curatorFramework; + + @Inject + public CleanOldNodesMigration(CuratorFramework curatorFramework) { + super(12); + this.curatorFramework = curatorFramework; + } + + @Override + public void applyMigration() { + List toClean = ImmutableList.of("/disasters/previous-stats", "/disasters/stats", "/disasters/task-credits", "/offer-state"); + try { + for (String node : toClean) { + if (curatorFramework.checkExists().forPath(node) != null) { + curatorFramework.delete().deletingChildrenIfNeeded().forPath(node); + } + } + } catch (Throwable t) { + throw new RuntimeException(t); + } + } +} diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java new file mode 100644 index 0000000000..b2c661ee88 --- /dev/null +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java @@ -0,0 +1,47 @@ +package com.hubspot.singularity.data.zkmigrations; + +import java.util.List; + +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.utils.ZKPaths; + +import com.google.inject.Inject; +import com.hubspot.singularity.SingularityTaskId; + +public class NamespaceActiveTasksMigration extends ZkDataMigration { + private static final String ACTIVE_TASKS_ROOT = "/tasks/active"; + private static final String ACTIVE_STATUSES_ROOT = "/tasks/statuses"; + + private final CuratorFramework curatorFramework; + + @Inject + public NamespaceActiveTasksMigration(CuratorFramework curatorFramework) { + super(14); + this.curatorFramework = curatorFramework; + } + + @Override + public void applyMigration() { + try { + if (curatorFramework.checkExists().forPath(ACTIVE_TASKS_ROOT) != null) { + curatorFramework.delete().deletingChildrenIfNeeded().forPath(ACTIVE_TASKS_ROOT); + + List currentActive = curatorFramework.getChildren().forPath(ACTIVE_STATUSES_ROOT); + for (String taskIdString : currentActive) { + SingularityTaskId taskId = SingularityTaskId.valueOf(taskIdString); + String oldPath = ZKPaths.makePath(ACTIVE_STATUSES_ROOT, taskIdString); + byte[] oldData = curatorFramework.getData().forPath(oldPath); + String newPath = ZKPaths.makePath(ACTIVE_STATUSES_ROOT, taskId.getRequestId(), taskIdString); + if (curatorFramework.checkExists().forPath(newPath) != null) { + curatorFramework.setData().forPath(newPath, oldData); + } else { + curatorFramework.create().creatingParentsIfNeeded().forPath(newPath, oldData); + } + curatorFramework.delete().forPath(oldPath); + } + } + } catch (Throwable t) { + throw new RuntimeException(t); + } + } +} diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespacePendingTasksMigration.java b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespacePendingTasksMigration.java new file mode 100644 index 0000000000..6573fc3dc0 --- /dev/null +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespacePendingTasksMigration.java @@ -0,0 +1,44 @@ +package com.hubspot.singularity.data.zkmigrations; + +import java.util.List; + +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.utils.ZKPaths; + +import com.google.inject.Inject; +import com.hubspot.singularity.SingularityPendingTaskId; + +public class NamespacePendingTasksMigration extends ZkDataMigration { + private static final String PENDING_TASK_ROOT = "/tasks/scheduled"; + + private final CuratorFramework curatorFramework; + + @Inject + public NamespacePendingTasksMigration(CuratorFramework curatorFramework) { + super(13); + this.curatorFramework = curatorFramework; + } + + @Override + public void applyMigration() { + try { + if (curatorFramework.checkExists().forPath(PENDING_TASK_ROOT) != null) { + List currentPendingTasks = curatorFramework.getChildren().forPath(PENDING_TASK_ROOT); + for (String taskIdString : currentPendingTasks) { + SingularityPendingTaskId pendingTaskId = SingularityPendingTaskId.valueOf(taskIdString); + String oldPath = ZKPaths.makePath(PENDING_TASK_ROOT, taskIdString); + byte[] oldData = curatorFramework.getData().forPath(oldPath); + String newPath = ZKPaths.makePath(PENDING_TASK_ROOT, pendingTaskId.getRequestId(), taskIdString); + if (curatorFramework.checkExists().forPath(newPath) != null) { + curatorFramework.setData().forPath(newPath, oldData); + } else { + curatorFramework.create().creatingParentsIfNeeded().forPath(newPath, oldData); + } + curatorFramework.delete().forPath(oldPath); + } + } + } catch (Throwable t) { + throw new RuntimeException(t); + } + } +} diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/SingularityZkMigrationsModule.java b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/SingularityZkMigrationsModule.java index 1644de5cdc..d80aee9519 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/SingularityZkMigrationsModule.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/SingularityZkMigrationsModule.java @@ -28,6 +28,9 @@ public void configure(Binder binder) { dataMigrations.addBinding().to(SingularityRequestTypeMigration.class); dataMigrations.addBinding().to(PendingRequestDataMigration.class); dataMigrations.addBinding().to(SingularityPendingRequestWithRunIdMigration.class); + dataMigrations.addBinding().to(CleanOldNodesMigration.class); + dataMigrations.addBinding().to(NamespacePendingTasksMigration.class); + dataMigrations.addBinding().to(NamespaceActiveTasksMigration.class); } @Provides diff --git a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosStatusUpdateHandler.java b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosStatusUpdateHandler.java index 51d05a3453..5affc1c625 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosStatusUpdateHandler.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosStatusUpdateHandler.java @@ -258,9 +258,7 @@ private void unsafeProcessStatusUpdate(Protos.TaskStatus status, SingularityTask } } - final boolean isActiveTask = taskManager.isActiveTask(taskId); - - if (isActiveTask && !taskState.isDone()) { + if (!taskState.isDone()) { if (task.isPresent()) { final Optional pendingDeploy = deployManager.getPendingDeploy(taskIdObj.getRequestId()); @@ -298,19 +296,19 @@ private void unsafeProcessStatusUpdate(Protos.TaskStatus status, SingularityTask taskManager.deleteKilledRecord(taskIdObj); - handleCompletedTaskState(status, taskIdObj, taskState, taskHistoryUpdateCreateResult, task, timestamp, isActiveTask); + handleCompletedTaskState(status, taskIdObj, taskState, taskHistoryUpdateCreateResult, task, timestamp); } saveNewTaskStatusHolder(taskIdObj, newTaskStatusHolder, taskState); } private synchronized void handleCompletedTaskState(TaskStatus status, SingularityTaskId taskIdObj, ExtendedTaskState taskState, - SingularityCreateResult taskHistoryUpdateCreateResult, Optional task, long timestamp, boolean isActiveTask) { + SingularityCreateResult taskHistoryUpdateCreateResult, Optional task, long timestamp) { // Method synchronized to prevent race condition where two tasks complete at the same time but the leader cache holding the state // doesn't get updated between each task completion. If this were to happen, then slaves would never transition from DECOMMISSIONING to // DECOMMISSIONED because each task state check thinks the other task is still running. slaveAndRackManager.checkStateAfterFinishedTask(taskIdObj, status.getAgentId().getValue(), leaderCache); - scheduler.handleCompletedTask(task, taskIdObj, isActiveTask, timestamp, taskState, taskHistoryUpdateCreateResult, status); + scheduler.handleCompletedTask(task, taskIdObj, timestamp, taskState, taskHistoryUpdateCreateResult, status); } public CompletableFuture processStatusUpdateAsync(Protos.TaskStatus status) { diff --git a/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskResource.java b/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskResource.java index ea5655b620..1c8fb9da91 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskResource.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskResource.java @@ -38,7 +38,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.inject.Inject; import com.hubspot.jackson.jaxrs.PropertyFiltering; @@ -256,7 +255,7 @@ public List getScheduledTasksForRequest( @Parameter(description = "Use the cached version of this data to limit expensive api calls") @QueryParam("useWebCache") Boolean useWebCache) { authorizationHelper.checkForAuthorizationByRequestId(requestId, user, SingularityAuthorizationScope.READ); - final List tasks = Lists.newArrayList(Iterables.filter(taskManager.getPendingTasks(useWebCache(useWebCache)), SingularityPendingTask.matchingRequest(requestId))); + final List tasks = Lists.newArrayList(taskManager.getPendingTasksForRequest(requestId, true)); return taskRequestManager.getTaskRequests(tasks); } @@ -345,7 +344,7 @@ private SingularityTask checkActiveTask(String taskId, SingularityAuthorizationS Optional task = taskManager.getTask(taskIdObj); - checkNotFound(task.isPresent() && taskManager.isActiveTask(taskId), "No active task with id %s", taskId); + checkNotFound(task.isPresent() && taskManager.isActiveTask(taskIdObj), "No active task with id %s", taskId); if (task.isPresent()) { authorizationHelper.checkForAuthorizationByRequestId(task.get().getTaskId().getRequestId(), user, scope); @@ -593,7 +592,7 @@ public SingularityTaskShellCommandRequest runShellCommand( authorizationHelper.checkForAuthorizationByTaskId(taskId, user, SingularityAuthorizationScope.WRITE); validator.checkActionEnabled(SingularityAction.RUN_SHELL_COMMAND); - if (!taskManager.isActiveTask(taskId)) { + if (!taskManager.isActiveTask(taskIdObj)) { throw WebExceptions.badRequest("%s is not an active task, can't run %s on it", taskId, shellCommand.getName()); } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskTrackerResource.java b/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskTrackerResource.java index 52fdec630f..0308f4ca5f 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskTrackerResource.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskTrackerResource.java @@ -92,7 +92,7 @@ public Optional getTaskStateByRunId( } } // Check if it's pending - for (SingularityPendingTask pendingTask : taskManager.getPendingTasksForRequest(requestId)) { + for (SingularityPendingTask pendingTask : taskManager.getPendingTasksForRequest(requestId, false)) { if (pendingTask.getRunId().isPresent() && pendingTask.getRunId().get().equals(runId)) { return Optional.of(new SingularityTaskState( Optional.absent(), diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCleaner.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCleaner.java index cf0002c6f9..2c3b803b40 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCleaner.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCleaner.java @@ -34,7 +34,7 @@ import com.hubspot.singularity.SingularityLoadBalancerUpdate; import com.hubspot.singularity.SingularityPendingRequest; import com.hubspot.singularity.SingularityPendingRequest.PendingType; -import com.hubspot.singularity.SingularityPendingTask; +import com.hubspot.singularity.SingularityPendingTaskId; import com.hubspot.singularity.SingularityRequest; import com.hubspot.singularity.SingularityRequestCleanup; import com.hubspot.singularity.SingularityRequestDeployState; @@ -340,7 +340,7 @@ private void drainRequestCleanupQueue() { private void processRequestCleanup(long start, AtomicInteger numTasksKilled, AtomicInteger numScheduledTasksRemoved, SingularityRequestCleanup requestCleanup) { final List activeTaskIds = taskManager.getActiveTaskIdsForRequest(requestCleanup.getRequestId()); - final List pendingTasks = taskManager.getPendingTasksForRequest(requestCleanup.getRequestId()); + final List pendingTaskIds = taskManager.getPendingTaskIdsForRequest(requestCleanup.getRequestId()); final String requestId = requestCleanup.getRequestId(); final Optional requestWithState = requestManager.getRequest(requestId); @@ -403,9 +403,9 @@ private void processRequestCleanup(long start, AtomicInteger numTasksKilled, Ato } if (killScheduledTasks) { - for (SingularityPendingTask matchingTask : Iterables.filter(pendingTasks, SingularityPendingTask.matchingRequest(requestId))) { - LOG.debug("Deleting scheduled task {} due to {}", matchingTask, requestCleanup); - taskManager.deletePendingTask(matchingTask.getPendingTaskId()); + for (SingularityPendingTaskId matchingTaskId : pendingTaskIds) { + LOG.debug("Deleting scheduled task {} due to {}", matchingTaskId, requestCleanup); + taskManager.deletePendingTask(matchingTaskId); numScheduledTasksRemoved.getAndIncrement(); } } @@ -538,7 +538,7 @@ public int drainCleanupQueue() { } private boolean isValidTask(SingularityTaskCleanup cleanupTask) { - return taskManager.isActiveTask(cleanupTask.getTaskId().getId()); + return taskManager.isActiveTask(cleanupTask.getTaskId()); } private void checkKilledTaskIdRecords() { @@ -560,7 +560,7 @@ private void checkKilledTaskIdRecords() { .forEach((killedTaskIdRecordsForRequest) -> { lock.runWithRequestLock(() -> { for (SingularityKilledTaskIdRecord killedTaskIdRecord : killedTaskIdRecordsForRequest.getValue()) { - if (!taskManager.isActiveTask(killedTaskIdRecord.getTaskId().getId())) { + if (!taskManager.isActiveTask(killedTaskIdRecord.getTaskId())) { SingularityDeleteResult deleteResult = taskManager.deleteKilledRecord(killedTaskIdRecord.getTaskId()); LOG.debug("Deleting obsolete {} - {}", killedTaskIdRecord, deleteResult); @@ -903,7 +903,7 @@ private boolean canRunRequestLbCleanup(SingularityRequestLbCleanup cleanup, Lis return false; } for (String taskId : cleanup.getActiveTaskIds()) { - if (taskManager.isActiveTask(taskId)) { + if (taskManager.isActiveTask(SingularityTaskId.valueOf(taskId))) { LOG.trace("Request still has active tasks, will wait for lb request cleanup"); return false; } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java index e310935e6f..2aca996cb5 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java @@ -76,7 +76,7 @@ public void saveResult(Optional statusCode, Optional responseBo taskManager.saveHealthcheckResult(result); if (result.isFailed()) { - if (!taskManager.isActiveTask(task.getTaskId().getId())) { + if (!taskManager.isActiveTask(task.getTaskId())) { LOG.trace("Task {} is not active, not re-enqueueing healthcheck", task.getTaskId()); return; } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java index 885ebdab5c..001ae370b7 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java @@ -174,13 +174,13 @@ public void savePendingTask(SingularityPendingTask pendingTask) { pendingTaskIdToPendingTask.put(pendingTask.getPendingTaskId(), pendingTask); } - public void deleteActiveTaskId(String taskId) { + public void deleteActiveTaskId(SingularityTaskId taskId) { if (!active) { LOG.warn("deleteActiveTask {}, but not active", taskId); return; } - activeTaskIds.remove(SingularityTaskId.valueOf(taskId)); + activeTaskIds.remove(taskId); } public List exists(List taskIds) { @@ -234,8 +234,8 @@ public int getNumPendingTasks() { return pendingTaskIdToPendingTask.size(); } - public boolean isActiveTask(String taskId) { - return activeTaskIds.contains(SingularityTaskId.valueOf(taskId)); + public boolean isActiveTask(SingularityTaskId taskId) { + return activeTaskIds.contains(taskId); } public void putActiveTask(SingularityTask task) { diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityNewTaskChecker.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityNewTaskChecker.java index 8babdab260..dc5b40228a 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityNewTaskChecker.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityNewTaskChecker.java @@ -321,7 +321,7 @@ private void checkForRepeatedFailures(Optional requ @VisibleForTesting CheckTaskState getTaskState(SingularityTask task, Optional requestWithState, SingularityHealthchecker healthchecker) { - if (!taskManager.isActiveTask(task.getTaskId().getId())) { + if (!taskManager.isActiveTask(task.getTaskId())) { return CheckTaskState.OBSOLETE; } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityScheduler.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityScheduler.java index de5f5838fb..f6c0394727 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityScheduler.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityScheduler.java @@ -643,14 +643,10 @@ private SingularityDeployStatistics getDeployStatistics(String requestId, String } @Timed - public void handleCompletedTask(Optional task, SingularityTaskId taskId, boolean wasActive, long timestamp, ExtendedTaskState state, + public void handleCompletedTask(Optional task, SingularityTaskId taskId, long timestamp, ExtendedTaskState state, SingularityCreateResult taskHistoryUpdateCreateResult, Protos.TaskStatus status) { final SingularityDeployStatistics deployStatistics = getDeployStatistics(taskId.getRequestId(), taskId.getDeployId()); - if (wasActive) { - taskManager.deleteActiveTask(taskId.getId()); - } - if (!task.isPresent() || task.get().getTaskRequest().getRequest().isLoadBalanced()) { taskManager.createLBCleanupTask(taskId); } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityTaskShellCommandDispatchPoller.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityTaskShellCommandDispatchPoller.java index 21a1b42a1f..3571e8ccdf 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityTaskShellCommandDispatchPoller.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityTaskShellCommandDispatchPoller.java @@ -58,7 +58,7 @@ public void runActionOnPoll() { for (SingularityTaskShellCommandRequest shellRequest : shellRequests) { Optional task = taskManager.getTask(shellRequest.getTaskId()); - if (!task.isPresent() || !taskManager.isActiveTask(shellRequest.getTaskId().getId())) { + if (!task.isPresent() || !taskManager.isActiveTask(shellRequest.getTaskId())) { LOG.info("Skipping shell request {} because {} didn't exist or isn't active", shellRequest, shellRequest.getTaskId()); continue; } diff --git a/SingularityService/src/test/java/com/hubspot/singularity/mesos/SingularityStartupTest.java b/SingularityService/src/test/java/com/hubspot/singularity/mesos/SingularityStartupTest.java index 5fa6ae25dc..32bca74709 100644 --- a/SingularityService/src/test/java/com/hubspot/singularity/mesos/SingularityStartupTest.java +++ b/SingularityService/src/test/java/com/hubspot/singularity/mesos/SingularityStartupTest.java @@ -44,7 +44,7 @@ public void testFailuresInLaunchPath() { Assert.assertTrue(taskManager.getActiveTaskIds().size() == 1); - taskManager.deleteActiveTask(task.getTaskId().getId()); + taskManager.deleteLastActiveTaskStatus(task.getTaskId()); resourceOffers(); diff --git a/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularitySchedulerTest.java b/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularitySchedulerTest.java index 254826604f..09f34c2b83 100644 --- a/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularitySchedulerTest.java +++ b/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularitySchedulerTest.java @@ -1535,12 +1535,12 @@ public void testTaskOddities() { taskManager.deleteTaskHistory(taskOne.getTaskId()); - Assert.assertTrue(taskManager.isActiveTask(taskOne.getTaskId().getId())); + Assert.assertTrue(taskManager.isActiveTask(taskOne.getTaskId())); statusUpdate(taskOne, TaskState.TASK_RUNNING); statusUpdate(taskOne, TaskState.TASK_FAILED); - Assert.assertTrue(!taskManager.isActiveTask(taskOne.getTaskId().getId())); + Assert.assertTrue(!taskManager.isActiveTask(taskOne.getTaskId())); Assert.assertEquals(2, taskManager.getTaskHistoryUpdates(taskOne.getTaskId()).size()); } From d6d4b8efc0a4053417a42d427f123824b4627c92 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 09:18:30 -0400 Subject: [PATCH 19/37] test fixing, test migration --- .../singularity/data/CuratorAsyncManager.java | 1 + .../hubspot/singularity/data/TaskManager.java | 31 +++++--- .../data/zkmigrations/ZkMigrationTest.java | 74 +++++-------------- 3 files changed, 40 insertions(+), 66 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java index b51ab39fe3..9f3edbf8e7 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java @@ -354,6 +354,7 @@ protected List getAsyncNestedChildIdsAsListThrows(f for (String child : getChildren(parentPath)) { allPaths.add(ZKPaths.makePath(parentPath, child)); } + LOG.info("All paths {}", allPaths); final List results = new ArrayList<>(); final CountDownLatch latch = new CountDownLatch(allPaths.size()); diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java index d7ccdbad1b..39f6988e9e 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java @@ -13,6 +13,7 @@ import org.apache.curator.framework.api.transaction.CuratorTransactionFinal; import org.apache.curator.utils.ZKPaths; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NodeExistsException; import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -475,10 +476,6 @@ public String apply(SingularityTaskId taskId) { return activeTasks; } - public List getLastActiveTaskStatuses() { - return getAsyncChildren(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT, taskStatusTranscoder); - } - @Timed public Optional getLastActiveTaskStatus(SingularityTaskId taskId) { return getData(getLastActiveTaskStatusPath(taskId), taskStatusTranscoder); @@ -537,10 +534,6 @@ public Map> getAllTaskHist return getTaskHistoryUpdates(getAllTaskIds()); } - public int getNumHealthchecks(SingularityTaskId taskId) { - return getNumChildren(getHealthcheckParentPath(taskId)); - } - public int getNumNonstartupHealthchecks(SingularityTaskId taskId) { int numChecks = 0; List checks = getChildren(getHealthcheckParentPath(taskId)); @@ -949,15 +942,29 @@ private void createTaskAndDeletePendingTaskPrivate(SingularityTask task) throws msg = String.format("%s (%s)", msg, task.getTaskRequest().getPendingTask().getMessage().get()); } - saveTaskHistoryUpdate(new SingularityTaskHistoryUpdate(task.getTaskId(), now, ExtendedTaskState.TASK_LAUNCHED, Optional.of(msg), Optional.absent())); - saveLastActiveTaskStatus(new SingularityTaskStatusHolder(task.getTaskId(), Optional.absent(), now, serverId, Optional.of(task.getAgentId().getValue()))); + saveTaskHistoryUpdate(new SingularityTaskHistoryUpdate(task.getTaskId(), now, ExtendedTaskState.TASK_LAUNCHED, Optional.of(msg), Optional.absent())); + + SingularityTaskStatusHolder taskStatusHolder = new SingularityTaskStatusHolder(task.getTaskId(), Optional.absent(), now, serverId, Optional.of(task.getAgentId().getValue())); + + String taskStatusParent = getLastActiveTaskParent(task.getTaskId().getRequestId()); + if (!exists(taskStatusParent)) { + try { + curator.create().forPath(taskStatusParent); + } catch (NodeExistsException nee) { + LOG.debug("Node {} already existed", taskStatusParent); + } + } try { final String path = getTaskPath(task.getTaskId()); - CuratorTransactionFinal transaction = curator.inTransaction().create().forPath(path, taskTranscoder.toBytes(task)).and(); + CuratorTransactionFinal transaction = curator.inTransaction().create() + .forPath(path, taskTranscoder.toBytes(task)) + .and(); - transaction.create().forPath(getLastActiveTaskStatusPath(task.getTaskId())).and().commit(); + transaction.create() + .forPath(getLastActiveTaskStatusPath(task.getTaskId()), taskStatusTranscoder.toBytes(taskStatusHolder)) + .and().commit(); leaderCache.putActiveTask(task); taskCache.set(path, task); diff --git a/SingularityService/src/test/java/com/hubspot/singularity/data/zkmigrations/ZkMigrationTest.java b/SingularityService/src/test/java/com/hubspot/singularity/data/zkmigrations/ZkMigrationTest.java index cc97e660d7..cab8b8ab0e 100644 --- a/SingularityService/src/test/java/com/hubspot/singularity/data/zkmigrations/ZkMigrationTest.java +++ b/SingularityService/src/test/java/com/hubspot/singularity/data/zkmigrations/ZkMigrationTest.java @@ -4,7 +4,6 @@ import java.util.List; import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.utils.ZKPaths; import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; @@ -15,16 +14,17 @@ import com.google.inject.Inject; import com.hubspot.singularity.RequestState; import com.hubspot.singularity.RequestType; -import com.hubspot.singularity.SingularityDeployKey; import com.hubspot.singularity.SingularityPendingRequest; import com.hubspot.singularity.SingularityPendingRequest.PendingType; +import com.hubspot.singularity.SingularityPendingTask; +import com.hubspot.singularity.SingularityPendingTaskBuilder; import com.hubspot.singularity.SingularityPendingTaskId; +import com.hubspot.singularity.SingularityTaskId; +import com.hubspot.singularity.SingularityTaskStatusHolder; import com.hubspot.singularity.SingularityTestBaseNoDb; import com.hubspot.singularity.data.MetadataManager; import com.hubspot.singularity.data.RequestManager; import com.hubspot.singularity.data.TaskManager; -import com.hubspot.singularity.data.transcoders.StringTranscoder; -import com.hubspot.singularity.data.zkmigrations.SingularityCmdLineArgsMigration.SingularityPendingRequestPrevious; public class ZkMigrationTest extends SingularityTestBaseNoDb { @@ -60,61 +60,27 @@ public void testMigrationRunner() { Assert.assertTrue(migrationRunner.checkMigrations() == 0); } - private String getPendingPath(String requestId, String deployId) { - return ZKPaths.makePath(SingularityCmdLineArgsMigration.REQUEST_PENDING_PATH, new SingularityDeployKey(requestId, deployId).getId()); - } - - private String getPendingPath(SingularityPendingTaskId pendingTaskId) { - return ZKPaths.makePath(SingularityCmdLineArgsMigration.TASK_PENDING_PATH, pendingTaskId.getId()); - } - @Test - public void testCmdLineArgsMigration() throws Exception { - metadataManager.setZkDataVersion("2"); - - // save some old stuff - SingularityPendingRequestPrevious p1 = new SingularityPendingRequestPrevious("r1", "d1", 23L, Optional. absent(), PendingType.BOUNCE, Optional. absent()); - SingularityPendingRequestPrevious p2 = new SingularityPendingRequestPrevious("r2", "d3", 123L, Optional.of("user1"), PendingType.BOUNCE, Optional.of("cmd line args")); - - byte[] p1b = objectMapper.writeValueAsBytes(p1); - byte[] p2b = objectMapper.writeValueAsBytes(p2); - - curator.create().creatingParentsIfNeeded().forPath(getPendingPath("r1", "d1"), p1b); - curator.create().creatingParentsIfNeeded().forPath(getPendingPath("r2", "de"), p2b); - - SingularityPendingTaskId pt1 = new SingularityPendingTaskId("r1", "d1", 23L, 3, PendingType.BOUNCE, 1L); - SingularityPendingTaskId pt2 = new SingularityPendingTaskId("r2", "d3", 231L, 1, PendingType.UNPAUSED, 23L); + public void testNamespaceTasksMigration() throws Exception { + metadataManager.setZkDataVersion("11"); + long now = System.currentTimeMillis(); + SingularityPendingTaskId testPending = new SingularityPendingTaskId("test", "deploy", now, 1, PendingType.IMMEDIATE, now); + SingularityPendingTask pendingTask = new SingularityPendingTaskBuilder().setPendingTaskId(testPending).build(); + curator.create().creatingParentsIfNeeded().forPath("/tasks/scheduled/" + testPending.getId(), objectMapper.writeValueAsBytes(pendingTask)); - curator.create().creatingParentsIfNeeded().forPath(getPendingPath(pt1)); - curator.create().creatingParentsIfNeeded().forPath(getPendingPath(pt2), StringTranscoder.INSTANCE.toBytes("cmd line args")); + SingularityTaskId taskId = new SingularityTaskId("test", "deploy", now, 1, "host", "rack"); + curator.create().creatingParentsIfNeeded().forPath("/tasks/active/" + taskId.getId()); + SingularityTaskStatusHolder statusHolder = new SingularityTaskStatusHolder(taskId, Optional.absent(), now, "1234", Optional.absent()); + curator.create().creatingParentsIfNeeded().forPath("/tasks/statuses/" + taskId.getId(), objectMapper.writeValueAsBytes(statusHolder)); migrationRunner.checkMigrations(); - Assert.assertTrue(!taskManager.getPendingTask(pt1).get().getCmdLineArgsList().isPresent()); - Assert.assertTrue(taskManager.getPendingTask(pt2).get().getCmdLineArgsList().get().get(0).equals("cmd line args")); - Assert.assertTrue(taskManager.getPendingTask(pt2).get().getCmdLineArgsList().get().size() == 1); - - Assert.assertTrue(taskManager.getPendingTaskIds().contains(pt1)); - Assert.assertTrue(taskManager.getPendingTaskIds().contains(pt2)); - - Assert.assertTrue(requestManager.getPendingRequests().size() == 2); - - for (SingularityPendingRequest r : requestManager.getPendingRequests()) { - if (r.getRequestId().equals("r1")) { - Assert.assertEquals(r.getDeployId(), p1.getDeployId()); - Assert.assertEquals(r.getTimestamp(), p1.getTimestamp()); - Assert.assertEquals(r.getPendingType(), p1.getPendingType()); - Assert.assertTrue(!r.getCmdLineArgsList().isPresent()); - Assert.assertEquals(r.getUser(), p1.getUser()); - } else { - Assert.assertEquals(r.getDeployId(), p2.getDeployId()); - Assert.assertEquals(r.getTimestamp(), p2.getTimestamp()); - Assert.assertEquals(r.getPendingType(), p2.getPendingType()); - Assert.assertTrue(r.getCmdLineArgsList().get().size() == 1); - Assert.assertTrue(r.getCmdLineArgsList().get().get(0).equals("cmd line args")); - Assert.assertEquals(r.getUser(), p2.getUser()); - } - } + List pendingTaskIds = taskManager.getPendingTaskIds(); + Assert.assertTrue(pendingTaskIds.contains(testPending)); + Assert.assertEquals(pendingTask, taskManager.getPendingTask(testPending).get()); + + List active = taskManager.getActiveTaskIds(); + Assert.assertTrue(active.contains(taskId)); } @Test From f6716874d90a88543a6ccfa158e75080e675fd1d Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 09:20:55 -0400 Subject: [PATCH 20/37] do this at the end --- .../data/zkmigrations/NamespaceActiveTasksMigration.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java index b2c661ee88..3f317b6838 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java @@ -24,8 +24,6 @@ public NamespaceActiveTasksMigration(CuratorFramework curatorFramework) { public void applyMigration() { try { if (curatorFramework.checkExists().forPath(ACTIVE_TASKS_ROOT) != null) { - curatorFramework.delete().deletingChildrenIfNeeded().forPath(ACTIVE_TASKS_ROOT); - List currentActive = curatorFramework.getChildren().forPath(ACTIVE_STATUSES_ROOT); for (String taskIdString : currentActive) { SingularityTaskId taskId = SingularityTaskId.valueOf(taskIdString); @@ -39,6 +37,8 @@ public void applyMigration() { } curatorFramework.delete().forPath(oldPath); } + + curatorFramework.delete().deletingChildrenIfNeeded().forPath(ACTIVE_TASKS_ROOT); } } catch (Throwable t) { throw new RuntimeException(t); From 545224d04b80920918da215aa3d0b2eeacb4b223 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 09:53:41 -0400 Subject: [PATCH 21/37] bump dep versions --- pom.xml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index a6f7473e5b..e4c5966c49 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ com.hubspot basepom - 25.1 + 25.3 Singularity @@ -35,7 +35,7 @@ oss-release 1.3.1 3.7 - 2.12.0 + 4.2.0 4.0.2 3.0.2 1.15.0 @@ -57,13 +57,14 @@ 1.2.3 0.1.0 3.1.3 + 3.10.6.Final 4.1.22.Final 3.5.1 0.9.11 1.3.8 1.7.25 2.0.0 - 3.4.8 + 3.5.4-beta 1.3.5.1 1.3.7 0.1.1 From d5a1dac07061ef3b18898126dacfbf5d0c674c4c Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 10:24:55 -0400 Subject: [PATCH 22/37] fix ning async update --- .../config/SingularityExecutorModule.java | 4 ++-- .../SingularityExecutorArtifactFetcher.java | 5 ++--- .../singularity/SingularityAuthModule.java | 4 ++-- .../singularity/data/CuratorAsyncManager.java | 1 - .../singularity/data/SandboxManager.java | 18 ++++++------------ .../hooks/LoadBalancerClientImpl.java | 4 ++-- .../hooks/SingularityWebhookSender.java | 7 +++---- .../resources/AbstractLeaderAwareResource.java | 4 ++-- .../singularity/resources/TaskResource.java | 8 ++------ .../scheduler/SingularityHealthchecker.java | 10 +++------- .../scheduler/SingularityHealthchecksTest.java | 3 ++- pom.xml | 1 + 12 files changed, 27 insertions(+), 42 deletions(-) diff --git a/SingularityExecutor/src/main/java/com/hubspot/singularity/executor/config/SingularityExecutorModule.java b/SingularityExecutor/src/main/java/com/hubspot/singularity/executor/config/SingularityExecutorModule.java index 31327b4e2e..7949343a87 100644 --- a/SingularityExecutor/src/main/java/com/hubspot/singularity/executor/config/SingularityExecutorModule.java +++ b/SingularityExecutor/src/main/java/com/hubspot/singularity/executor/config/SingularityExecutorModule.java @@ -45,8 +45,8 @@ protected void configure() { @Named(LOCAL_DOWNLOAD_HTTP_CLIENT) public AsyncHttpClient providesHttpClient(SingularityExecutorConfiguration configuration) { AsyncHttpClientConfig.Builder configBldr = new AsyncHttpClientConfig.Builder(); - configBldr.setRequestTimeoutInMs((int) configuration.getLocalDownloadServiceTimeoutMillis()); - configBldr.setIdleConnectionTimeoutInMs((int) configuration.getLocalDownloadServiceTimeoutMillis()); + configBldr.setRequestTimeout((int) configuration.getLocalDownloadServiceTimeoutMillis()); + configBldr.setPooledConnectionIdleTimeout((int) configuration.getLocalDownloadServiceTimeoutMillis()); configBldr.addRequestFilter(new ThrottleRequestFilter(configuration.getLocalDownloadServiceMaxConnections())); return new AsyncHttpClient(configBldr.build()); diff --git a/SingularityExecutor/src/main/java/com/hubspot/singularity/executor/task/SingularityExecutorArtifactFetcher.java b/SingularityExecutor/src/main/java/com/hubspot/singularity/executor/task/SingularityExecutorArtifactFetcher.java index b6c8fc0db2..08044ad072 100644 --- a/SingularityExecutor/src/main/java/com/hubspot/singularity/executor/task/SingularityExecutorArtifactFetcher.java +++ b/SingularityExecutor/src/main/java/com/hubspot/singularity/executor/task/SingularityExecutorArtifactFetcher.java @@ -1,6 +1,5 @@ package com.hubspot.singularity.executor.task; -import java.io.IOException; import java.nio.file.Path; import java.util.List; import java.util.Objects; @@ -172,8 +171,8 @@ private void downloadFilesFromLocalDownloadService(List s3 ListenableFuture future = localDownloadHttpClient.executeRequest(postRequestBldr.build()); futures.add(new FutureHolder(future, System.currentTimeMillis(), s3Artifact)); - } catch (IOException ioe) { - throw Throwables.propagate(ioe); + } catch (Throwable t) { + throw new RuntimeException(t); } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/SingularityAuthModule.java b/SingularityService/src/main/java/com/hubspot/singularity/SingularityAuthModule.java index 7bb1ebae7e..19131dd8b8 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/SingularityAuthModule.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/SingularityAuthModule.java @@ -29,8 +29,8 @@ public void configure(Binder binder) { if (clazz == SingularityAuthenticatorClass.WEBHOOK) { AuthConfiguration authConfiguration = getConfiguration().getAuthConfiguration(); AsyncHttpClientConfig clientConfig = new AsyncHttpClientConfig.Builder() - .setConnectionTimeoutInMs(authConfiguration.getWebhookAuthConnectTimeoutMs()) - .setRequestTimeoutInMs(authConfiguration.getWebhookAuthRequestTimeoutMs()) + .setConnectTimeout(authConfiguration.getWebhookAuthConnectTimeoutMs()) + .setRequestTimeout(authConfiguration.getWebhookAuthRequestTimeoutMs()) .setMaxRequestRetry(authConfiguration.getWebhookAuthRetries()) .build(); SingularityAsyncHttpClient webhookAsyncHttpClient = new SingularityAsyncHttpClient(clientConfig); diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java index 9f3edbf8e7..b51ab39fe3 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/CuratorAsyncManager.java @@ -354,7 +354,6 @@ protected List getAsyncNestedChildIdsAsListThrows(f for (String child : getChildren(parentPath)) { allPaths.add(ZKPaths.makePath(parentPath, child)); } - LOG.info("All paths {}", allPaths); final List results = new ArrayList<>(); final CountDownLatch latch = new CountDownLatch(allPaths.size()); diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/SandboxManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/SandboxManager.java index c8a6f04ffb..270cb6f88e 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/SandboxManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/SandboxManager.java @@ -22,7 +22,6 @@ import com.hubspot.mesos.json.MesosFileObject; import com.hubspot.singularity.config.SingularityConfiguration; import com.ning.http.client.AsyncHttpClient; -import com.ning.http.client.PerRequestConfig; import com.ning.http.client.Response; @Singleton @@ -52,13 +51,10 @@ public SlaveNotFoundException(Exception e) { public Collection browse(String slaveHostname, String fullPath) throws SlaveNotFoundException { try { - PerRequestConfig timeoutConfig = new PerRequestConfig(); - timeoutConfig.setRequestTimeoutInMs((int) configuration.getSandboxHttpTimeoutMillis()); - Response response = asyncHttpClient .prepareGet(String.format("http://%s:5051/files/browse", slaveHostname)) - .setPerRequestConfig(timeoutConfig) - .addQueryParameter("path", fullPath) + .setRequestTimeout((int) configuration.getSandboxHttpTimeoutMillis()) + .addQueryParam("path", fullPath) .execute() .get(); @@ -88,18 +84,16 @@ public Collection browse(String slaveHostname, String fullPath) public Optional read(String slaveHostname, String fullPath, Optional offset, Optional length) throws SlaveNotFoundException { try { final AsyncHttpClient.BoundRequestBuilder builder = asyncHttpClient.prepareGet(String.format("http://%s:5051/files/read", slaveHostname)) - .addQueryParameter("path", fullPath); + .addQueryParam("path", fullPath); - PerRequestConfig timeoutConfig = new PerRequestConfig(); - timeoutConfig.setRequestTimeoutInMs((int) configuration.getSandboxHttpTimeoutMillis()); - builder.setPerRequestConfig(timeoutConfig); + builder.setRequestTimeout((int) configuration.getSandboxHttpTimeoutMillis()); if (offset.isPresent()) { - builder.addQueryParameter("offset", offset.get().toString()); + builder.addQueryParam("offset", offset.get().toString()); } if (length.isPresent()) { - builder.addQueryParameter("length", length.get().toString()); + builder.addQueryParam("length", length.get().toString()); } final Response response = builder.execute().get(); diff --git a/SingularityService/src/main/java/com/hubspot/singularity/hooks/LoadBalancerClientImpl.java b/SingularityService/src/main/java/com/hubspot/singularity/hooks/LoadBalancerClientImpl.java index c0fcfdb5cc..cade723314 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/hooks/LoadBalancerClientImpl.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/hooks/LoadBalancerClientImpl.java @@ -8,10 +8,10 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.base.Throwables; @@ -98,7 +98,7 @@ private String getLoadBalancerUri(LoadBalancerRequestId loadBalancerRequestId) { private void addAllQueryParams(BoundRequestBuilder boundRequestBuilder, Map queryParams) { for (Map.Entry entry : queryParams.entrySet()) { - boundRequestBuilder.addQueryParameter(entry.getKey(), entry.getValue()); + boundRequestBuilder.addQueryParam(entry.getKey(), entry.getValue()); } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/hooks/SingularityWebhookSender.java b/SingularityService/src/main/java/com/hubspot/singularity/hooks/SingularityWebhookSender.java index df022adf71..a2e81d228b 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/hooks/SingularityWebhookSender.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/hooks/SingularityWebhookSender.java @@ -1,6 +1,5 @@ package com.hubspot.singularity.hooks; -import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CompletableFuture; @@ -209,13 +208,13 @@ private CompletableFuture executeWebhookAsync(String uri, Object p try { handler.setCompletableFuture(webhookFuture); postRequest.execute(handler); - } catch (IOException e) { - LOG.warn("Couldn't execute webhook to {}", uri, e); + } catch (Throwable t) { + LOG.warn("Couldn't execute webhook to {}", uri, t); if (handler.shouldDeleteUpdateDueToQueueAboveCapacity()) { handler.deleteWebhookUpdate(); } - webhookFuture.completeExceptionally(e); + webhookFuture.completeExceptionally(t); } return webhookFuture; } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/resources/AbstractLeaderAwareResource.java b/SingularityService/src/main/java/com/hubspot/singularity/resources/AbstractLeaderAwareResource.java index d5b9d909f6..e951371f87 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/resources/AbstractLeaderAwareResource.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/resources/AbstractLeaderAwareResource.java @@ -89,7 +89,7 @@ protected T maybeProxyToLeader(HttpServletRequest request, Class clazz try { LOG.trace("Sending request to leader: {}", httpRequest); response = httpClient.executeRequest(httpRequest).get(); - } catch (IOException|ExecutionException|InterruptedException e) { + } catch (ExecutionException|InterruptedException e) { LOG.error("Could not proxy request {} to leader", e); throw new WebApplicationException(e, 500); } @@ -120,7 +120,7 @@ private void copyHeadersAndParams(BoundRequestBuilder requestBuilder, HttpServle if (parameterNames != null) { while (parameterNames.hasMoreElements()) { String parameterName = parameterNames.nextElement(); - requestBuilder.addQueryParameter(parameterName, request.getParameter(parameterName)); + requestBuilder.addQueryParam(parameterName, request.getParameter(parameterName)); LOG.trace("Copied query param {}={}", parameterName, request.getParameter(parameterName)); } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskResource.java b/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskResource.java index 1c8fb9da91..a8d1d24542 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskResource.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/resources/TaskResource.java @@ -92,7 +92,6 @@ import com.ning.http.client.HttpResponseBodyPart; import com.ning.http.client.HttpResponseHeaders; import com.ning.http.client.HttpResponseStatus; -import com.ning.http.client.PerRequestConfig; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.dropwizard.auth.Auth; @@ -663,14 +662,11 @@ private Response getFile(String slaveHostname, String fileFullPath, boolean down httpPrefix, slaveHostname, httpPort); try { - PerRequestConfig unlimitedTimeout = new PerRequestConfig(); - unlimitedTimeout.setRequestTimeoutInMs(-1); - NingOutputToJaxRsStreamingOutputWrapper streamingOutputNingHandler = new NingOutputToJaxRsStreamingOutputWrapper( httpClient .prepareGet(url) - .addQueryParameter("path", fileFullPath) - .setPerRequestConfig(unlimitedTimeout) + .addQueryParam("path", fileFullPath) + .setRequestTimeout(-1) ); // Strip file path down to just a file name if we can diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java index e977bfa06b..58fe9f9c19 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java @@ -38,7 +38,6 @@ import com.hubspot.singularity.helpers.MesosUtils; import com.hubspot.singularity.sentry.SingularityExceptionNotifier; import com.ning.http.client.AsyncHttpClient; -import com.ning.http.client.PerRequestConfig; import com.ning.http.client.RequestBuilder; @Singleton @@ -291,21 +290,18 @@ private void asyncHealthcheck(final SingularityTask task) { task.getTaskRequest().getDeploy().getHealthcheck().get().getResponseTimeoutSeconds().or(configuration.getHealthcheckTimeoutSeconds()) : configuration.getHealthcheckTimeoutSeconds(); try { - PerRequestConfig prc = new PerRequestConfig(); - prc.setRequestTimeoutInMs((int) TimeUnit.SECONDS.toMillis(timeoutSeconds)); - RequestBuilder builder = new RequestBuilder("GET"); builder.setFollowRedirects(true); builder.setUrl(uri.get()); - builder.setPerRequestConfig(prc); + builder.setRequestTimeout((int) TimeUnit.SECONDS.toMillis(timeoutSeconds)); LOG.trace("Issuing a healthcheck ({}) for task {} with timeout {}s", uri.get(), task.getTaskId(), timeoutSeconds); http.prepareRequest(builder.build()).execute(handler); } catch (Throwable t) { - LOG.debug("Exception while preparing healthcheck ({}) for task ({})", uri, task.getTaskId(), t); + LOG.debug("Exception while preparing healthcheck ({}) for task ({})", uri.get(), task.getTaskId(), t); exceptionNotifier.notify(String.format("Error preparing healthcheck (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); - saveFailure(handler, String.format("Healthcheck failed due to exception: %s", t.getMessage())); + saveFailure(handler, String.format("Healthcheck (%s) failed due to exception: %s", uri.get(), t.getMessage())); } } diff --git a/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityHealthchecksTest.java b/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityHealthchecksTest.java index 6f8562ed2a..e42d8576f4 100644 --- a/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityHealthchecksTest.java +++ b/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityHealthchecksTest.java @@ -433,8 +433,9 @@ public void testPortIndices() { newTaskChecker.enqueueNewTaskCheck(firstTask, requestManager.getRequest(requestId), healthchecker); - Awaitility.await("healthcheck present").atMost(5, TimeUnit.SECONDS).until(() -> taskManager.getLastHealthcheck(firstTask.getTaskId()).isPresent()); + Awaitility.await("healthcheck present").atMost(6, TimeUnit.SECONDS).until(() -> taskManager.getLastHealthcheck(firstTask.getTaskId()).isPresent()); + Optional result = taskManager.getLastHealthcheck(firstTask.getTaskId()); Assert.assertTrue(taskManager.getLastHealthcheck(firstTask.getTaskId()).get().toString().contains("host1:81")); } finally { unsetConfigurationForNoDelay(); diff --git a/pom.xml b/pom.xml index e4c5966c49..6088c0d7f9 100644 --- a/pom.xml +++ b/pom.xml @@ -71,6 +71,7 @@ 1.5 1.6.1 1.6.1 + 1.9.38 1.8 ${project.artifactId}-${project.version} mysql From 255de95b947339ddaf4c7f4d3f74bc3ec8af5f8f Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 10:35:34 -0400 Subject: [PATCH 23/37] more test fixing form ning update --- .../scheduler/SingularityHealthcheckAsyncHandler.java | 9 +++++++-- .../singularity/scheduler/SingularityHealthchecker.java | 3 ++- .../scheduler/SingularityHealthchecksTest.java | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java index 2aca996cb5..e63240b56d 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java @@ -28,6 +28,7 @@ public class SingularityHealthcheckAsyncHandler extends AsyncCompletionHandler failureStatusCodes; + private String healthcheckUri = ""; // For logging purposes only public SingularityHealthcheckAsyncHandler(SingularityExceptionNotifier exceptionNotifier, SingularityConfiguration configuration, SingularityHealthchecker healthchecker, SingularityNewTaskChecker newTaskChecker, TaskManager taskManager, SingularityTask task) { @@ -44,6 +45,10 @@ public SingularityHealthcheckAsyncHandler(SingularityExceptionNotifier exception startTime = System.currentTimeMillis(); } + public void setHealthcehckUri(String healthcheckUri) { + this.healthcheckUri = healthcheckUri; + } + @Override public Response onCompleted(Response response) throws Exception { Optional responseBody = Optional.absent(); @@ -59,9 +64,9 @@ public Response onCompleted(Response response) throws Exception { @Override public void onThrowable(Throwable t) { - LOG.trace("Exception while making health check for task {}", task.getTaskId(), t); + LOG.trace("Exception while making health check for task {} ({})", task.getTaskId(), healthcheckUri, t); - saveResult(Optional. absent(), Optional. absent(), Optional.of(String.format("Healthcheck failed due to exception: %s", t.getMessage())), Optional.of(t)); + saveResult(Optional. absent(), Optional. absent(), Optional.of(String.format("Healthcheck (%s) failed due to exception: %s", healthcheckUri, t.getMessage())), Optional.of(t)); } public void saveResult(Optional statusCode, Optional responseBody, Optional errorMessage, Optional throwable) { diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java index 58fe9f9c19..f04514490a 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java @@ -278,13 +278,14 @@ private boolean shouldHealthcheck(final SingularityTask task, final Optional uri = getHealthcheckUri(task); + final SingularityHealthcheckAsyncHandler handler = new SingularityHealthcheckAsyncHandler(exceptionNotifier, configuration, this, newTaskChecker, taskManager, task); if (!uri.isPresent()) { saveFailure(handler, "Invalid healthcheck uri or ports not present"); return; } + handler.setHealthcehckUri(uri.get()); final Integer timeoutSeconds = task.getTaskRequest().getDeploy().getHealthcheck().isPresent() ? task.getTaskRequest().getDeploy().getHealthcheck().get().getResponseTimeoutSeconds().or(configuration.getHealthcheckTimeoutSeconds()) : configuration.getHealthcheckTimeoutSeconds(); diff --git a/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityHealthchecksTest.java b/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityHealthchecksTest.java index e42d8576f4..8764015e3b 100644 --- a/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityHealthchecksTest.java +++ b/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityHealthchecksTest.java @@ -416,7 +416,7 @@ public void testPortIndices() { try { setConfigurationForNoDelay(); initRequest(); - HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setPortIndex(Optional.of(1)).setStartupDelaySeconds(Optional.of(0)).build(); + HealthcheckOptions options = new HealthcheckOptionsBuilder("/uri").setPortIndex(Optional.of(1)).setStartupDelaySeconds(Optional.of(0)).build(); firstDeploy = initAndFinishDeploy(request, new SingularityDeployBuilder(request.getId(), firstDeployId).setCommand(Optional.of("sleep 100")) .setHealthcheck(Optional.of(options)), Optional.of(new Resources(1, 64, 3, 0))); From 0bb47e70599403b61cd1b580a12676b62268e8fe Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 10:53:57 -0400 Subject: [PATCH 24/37] auto-clean request groups --- .../singularity/data/RequestGroupManager.java | 24 ++++++++++--- .../AbstractLeaderAwareResource.java | 5 +++ .../resources/RequestGroupResource.java | 36 +++++++++++++------ .../scheduler/SingularityCleaner.java | 7 +++- 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/RequestGroupManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/RequestGroupManager.java index 25b5d96124..5b4431fcac 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/RequestGroupManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/RequestGroupManager.java @@ -1,5 +1,6 @@ package com.hubspot.singularity.data; +import java.util.ArrayList; import java.util.List; import org.apache.curator.framework.CuratorFramework; @@ -34,10 +35,6 @@ private String getRequestGroupPath(String requestGroupId) { return ZKPaths.makePath(REQUEST_GROUP_ROOT, requestGroupId); } - public List getRequestGroupIds() { - return getChildren(REQUEST_GROUP_ROOT); - } - public List getRequestGroups(boolean useWebCache) { if (useWebCache && webCache.useCachedRequestGroups()) { return webCache.getRequestGroups(); @@ -60,4 +57,23 @@ public SingularityCreateResult saveRequestGroup(SingularityRequestGroup requestG public SingularityDeleteResult deleteRequestGroup(String requestGroupId) { return delete(getRequestGroupPath(requestGroupId)); } + + public void removeFromAllGroups(String requestId) { + getRequestGroups(false).stream() + .filter((g) -> g.getRequestIds().contains(requestId)) + .forEach((g) -> { + List ids = new ArrayList<>(); + ids.addAll(g.getRequestIds()); + ids.remove(requestId); + if (ids.isEmpty()) { + deleteRequestGroup(g.getId()); + } else { + saveRequestGroup(new SingularityRequestGroup( + g.getId(), + ids, + g.getMetadata() + )); + } + }); + } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/resources/AbstractLeaderAwareResource.java b/SingularityService/src/main/java/com/hubspot/singularity/resources/AbstractLeaderAwareResource.java index e951371f87..2658b05392 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/resources/AbstractLeaderAwareResource.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/resources/AbstractLeaderAwareResource.java @@ -94,6 +94,11 @@ protected T maybeProxyToLeader(HttpServletRequest request, Class clazz throw new WebApplicationException(e, 500); } + // void responses + if (clazz.isAssignableFrom(Response.class)) { + return (T) response; + } + try { if (response.getStatusCode() > 399) { throw new WebApplicationException(response.getResponseBody(Charsets.UTF_8.toString()), response.getStatusCode()); diff --git a/SingularityService/src/main/java/com/hubspot/singularity/resources/RequestGroupResource.java b/SingularityService/src/main/java/com/hubspot/singularity/resources/RequestGroupResource.java index 38cd9a278c..d3fe113ea1 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/resources/RequestGroupResource.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/resources/RequestGroupResource.java @@ -2,6 +2,7 @@ import java.util.List; +import javax.servlet.http.HttpServletRequest; import javax.ws.rs.DELETE; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -9,14 +10,20 @@ import javax.ws.rs.PathParam; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.curator.framework.recipes.leader.LeaderLatch; + +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.inject.Inject; import com.hubspot.singularity.SingularityRequestGroup; import com.hubspot.singularity.config.ApiPaths; import com.hubspot.singularity.data.RequestGroupManager; import com.hubspot.singularity.data.SingularityValidator; +import com.ning.http.client.AsyncHttpClient; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; @@ -30,12 +37,14 @@ @Produces({MediaType.APPLICATION_JSON}) @Schema(title = "Manages Singularity Request Groups, which are collections of one or more Singularity Requests") @Tags({@Tag(name = "Request Groups")}) -public class RequestGroupResource { +public class RequestGroupResource extends AbstractLeaderAwareResource { private final RequestGroupManager requestGroupManager; private final SingularityValidator validator; @Inject - public RequestGroupResource(RequestGroupManager requestGroupManager, SingularityValidator validator) { + public RequestGroupResource(AsyncHttpClient httpClient, LeaderLatch leaderLatch, ObjectMapper objectMapper, + RequestGroupManager requestGroupManager, SingularityValidator validator) { + super(httpClient, leaderLatch, objectMapper); this.requestGroupManager = requestGroupManager; this.validator = validator; } @@ -63,19 +72,24 @@ public Optional getRequestGroup( @DELETE @Path("/group/{requestGroupId}") @Operation(summary = "Delete a specific Singularity request group by ID") - public void deleteRequestGroup( - @Parameter(required = true, description = "The id of the request group") @PathParam("requestGroupId") String requestGroupId) { - requestGroupManager.deleteRequestGroup(requestGroupId); + public Response deleteRequestGroup( + @Parameter(required = true, description = "The id of the request group") @PathParam("requestGroupId") String requestGroupId, + @Context HttpServletRequest requestContext) { + return maybeProxyToLeader(requestContext, Response.class, null, () -> { + requestGroupManager.deleteRequestGroup(requestGroupId); + return Response.ok().build(); + }); } @POST @Operation(summary = "Create a Singularity request group") public SingularityRequestGroup saveRequestGroup( - @RequestBody(required = true, description = "The new request group to create") SingularityRequestGroup requestGroup) { - validator.checkRequestGroup(requestGroup); - - requestGroupManager.saveRequestGroup(requestGroup); - - return requestGroup; + @RequestBody(required = true, description = "The new request group to create") SingularityRequestGroup requestGroup, + @Context HttpServletRequest requestContext) { + return maybeProxyToLeader(requestContext, SingularityRequestGroup.class, requestGroup, () -> { + validator.checkRequestGroup(requestGroup); + requestGroupManager.saveRequestGroup(requestGroup); + return requestGroup; + }); } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCleaner.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCleaner.java index 2c3b803b40..7c3d94f9dd 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCleaner.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCleaner.java @@ -50,6 +50,7 @@ import com.hubspot.singularity.TaskCleanupType; import com.hubspot.singularity.config.SingularityConfiguration; import com.hubspot.singularity.data.DeployManager; +import com.hubspot.singularity.data.RequestGroupManager; import com.hubspot.singularity.data.RequestManager; import com.hubspot.singularity.data.TaskManager; import com.hubspot.singularity.data.UsageManager; @@ -76,6 +77,7 @@ public class SingularityCleaner { private final SingularityMesosScheduler scheduler; private final SingularitySchedulerLock lock; private final UsageManager usageManager; + private final RequestGroupManager requestGroupManager; private final SingularityConfiguration configuration; private final long killNonLongRunningTasksInCleanupAfterMillis; @@ -83,7 +85,8 @@ public class SingularityCleaner { @Inject public SingularityCleaner(TaskManager taskManager, SingularityDeployHealthHelper deployHealthHelper, DeployManager deployManager, RequestManager requestManager, SingularityConfiguration configuration, LoadBalancerClient lbClient, SingularityExceptionNotifier exceptionNotifier, - RequestHistoryHelper requestHistoryHelper, SingularityMesosScheduler scheduler, SingularitySchedulerLock lock, UsageManager usageManager) { + RequestHistoryHelper requestHistoryHelper, SingularityMesosScheduler scheduler, SingularitySchedulerLock lock, UsageManager usageManager, + RequestGroupManager requestGroupManager) { this.taskManager = taskManager; this.lbClient = lbClient; this.deployHealthHelper = deployHealthHelper; @@ -94,6 +97,7 @@ public SingularityCleaner(TaskManager taskManager, SingularityDeployHealthHelper this.scheduler = scheduler; this.lock = lock; this.usageManager = usageManager; + this.requestGroupManager = requestGroupManager; this.configuration = configuration; @@ -523,6 +527,7 @@ private void cleanupRequestData(SingularityRequestCleanup requestCleanup) { deployManager.deleteRequestId(requestCleanup.getRequestId()); LOG.trace("Deleted stale request data for {}", requestCleanup.getRequestId()); usageManager.deleteRequestUtilization(requestCleanup.getRequestId()); + requestGroupManager.removeFromAllGroups(requestCleanup.getRequestId()); } public int drainCleanupQueue() { From 9465b085e54ee55223b72be6f7b0b07a014d22bc Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 10:57:07 -0400 Subject: [PATCH 25/37] also clean existing --- .../zkmigrations/CleanOldNodesMigration.java | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java index a730725871..512b6436b4 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java @@ -1,19 +1,27 @@ package com.hubspot.singularity.data.zkmigrations; import java.util.List; +import java.util.stream.Collectors; import org.apache.curator.framework.CuratorFramework; import com.google.common.collect.ImmutableList; import com.google.inject.Inject; +import com.hubspot.singularity.SingularityRequestGroup; +import com.hubspot.singularity.data.RequestGroupManager; +import com.hubspot.singularity.data.RequestManager; public class CleanOldNodesMigration extends ZkDataMigration { private final CuratorFramework curatorFramework; + private final RequestGroupManager requestGroupManager; + private final RequestManager requestManager; @Inject - public CleanOldNodesMigration(CuratorFramework curatorFramework) { + public CleanOldNodesMigration(CuratorFramework curatorFramework, RequestGroupManager requestGroupManager, RequestManager requestManager) { super(12); this.curatorFramework = curatorFramework; + this.requestGroupManager = requestGroupManager; + this.requestManager = requestManager; } @Override @@ -25,6 +33,24 @@ public void applyMigration() { curatorFramework.delete().deletingChildrenIfNeeded().forPath(node); } } + List allIds = requestManager.getAllRequestIds(); + for (SingularityRequestGroup requestGroup : requestGroupManager.getRequestGroups(false)) { + List ids = requestGroup.getRequestIds() + .stream() + .filter((id) -> allIds.contains(id)) + .collect(Collectors.toList()); + if (ids.isEmpty()) { + requestGroupManager.deleteRequestGroup(requestGroup.getId()); + } else { + if (!ids.equals(requestGroup.getRequestIds())) { + requestGroupManager.saveRequestGroup(new SingularityRequestGroup( + requestGroup.getId(), + ids, + requestGroup.getMetadata() + )); + } + } + } } catch (Throwable t) { throw new RuntimeException(t); } From f29b40205ecb58838df7e9fb40e82a5a53daa157 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 10:57:53 -0400 Subject: [PATCH 26/37] cleaner --- .../singularity/data/zkmigrations/CleanOldNodesMigration.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java index 512b6436b4..c5d7c421c8 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/CleanOldNodesMigration.java @@ -37,7 +37,7 @@ public void applyMigration() { for (SingularityRequestGroup requestGroup : requestGroupManager.getRequestGroups(false)) { List ids = requestGroup.getRequestIds() .stream() - .filter((id) -> allIds.contains(id)) + .filter(allIds::contains) .collect(Collectors.toList()); if (ids.isEmpty()) { requestGroupManager.deleteRequestGroup(requestGroup.getId()); From d84badd8aa12fa864dc0a54eb11b7deaabc773db Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 11:16:41 -0400 Subject: [PATCH 27/37] actually leader cache slave and rack data, and actually use it --- .../data/AbstractMachineManager.java | 20 +++++++++----- .../hubspot/singularity/data/RackManager.java | 24 ++++++++--------- .../singularity/data/SlaveManager.java | 27 ++++++++----------- .../scheduler/SingularityLeaderCache.java | 24 ++++++++++++++--- 4 files changed, 56 insertions(+), 39 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java index 20adaafd08..bfeb68bed1 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java @@ -52,9 +52,16 @@ public List getHistory(String objectId) { } public List getObjects() { - return getObjects(getRoot()); + List fromCache = getObjectsFromLeaderCache(); + if (fromCache != null) { + return fromCache; + } else { + return getObjectsNoCache(getRoot()); + } } + protected abstract List getObjectsFromLeaderCache(); + public List getObjectIds() { return getChildren(getRoot()); } @@ -113,13 +120,11 @@ public Optional getObject(String objectId) { return getData(getObjectPath(objectId), transcoder); } - protected List getObjects(String root) { + protected List getObjectsNoCache(String root) { return getAsyncChildren(root, transcoder); } - public SingularityDeleteResult removed(String objectId) { - return delete(getObjectPath(objectId)); - } + protected abstract void deleteFromLeaderCache(String objectId); public enum StateChangeResult { FAILURE_NOT_FOUND, FAILURE_ALREADY_AT_STATE, FAILURE_ILLEGAL_TRANSITION, SUCCESS; @@ -208,15 +213,18 @@ private SingularityCreateResult saveHistoryUpdate(SingularityMachineStateHistory } public SingularityDeleteResult deleteObject(String objectId) { + deleteFromLeaderCache(objectId); return delete(getObjectPath(objectId)); } public void saveObject(T object) { saveHistoryUpdate(object.getCurrentState()); - save(getObjectPath(object.getId()), object, transcoder); + saveObjectToLeaderCache(object); } + protected abstract void saveObjectToLeaderCache(T object); + private String getExpiringPath(String machineId) { return ZKPaths.makePath(getRoot(), EXPIRING_PATH, machineId); } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/RackManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/RackManager.java index 2c9565b482..01a5629c82 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/RackManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/RackManager.java @@ -1,12 +1,13 @@ package com.hubspot.singularity.data; +import java.util.List; + import org.apache.curator.framework.CuratorFramework; import com.codahale.metrics.MetricRegistry; import com.google.common.base.Optional; import com.google.inject.Inject; import com.google.inject.Singleton; -import com.hubspot.singularity.MachineState; import com.hubspot.singularity.SingularityMachineStateHistoryUpdate; import com.hubspot.singularity.SingularityRack; import com.hubspot.singularity.config.SingularityConfiguration; @@ -39,7 +40,7 @@ protected String getRoot() { } public void activateLeaderCache() { - leaderCache.cacheRacks(getObjects()); + leaderCache.cacheRacks(getObjectsNoCache(getRoot())); } public Optional getRack(String rackName) { @@ -51,21 +52,18 @@ public Optional getRack(String rackName) { } @Override - public int getNumActive() { - if (leaderCache.active()) { - return Math.toIntExact(leaderCache.getRacks().stream().filter(x -> x.getCurrentState().getState().equals(MachineState.ACTIVE)).count()); - } - - return super.getNumActive(); + public List getObjectsFromLeaderCache() { + return leaderCache.getRacks(); } @Override - public void saveObject(SingularityRack rack) { - if (leaderCache.active()) { - leaderCache.putRack(rack); - } + public void saveObjectToLeaderCache(SingularityRack rack) { + leaderCache.putRack(rack); + } - super.saveObject(rack); + @Override + public void deleteFromLeaderCache(String rackId) { + leaderCache.removeRack(rackId); } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/SlaveManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/SlaveManager.java index a373fc7dd8..24f9b0c454 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/SlaveManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/SlaveManager.java @@ -1,12 +1,13 @@ package com.hubspot.singularity.data; +import java.util.List; + import org.apache.curator.framework.CuratorFramework; import com.codahale.metrics.MetricRegistry; import com.google.common.base.Optional; import com.google.inject.Inject; import com.google.inject.Singleton; -import com.hubspot.singularity.MachineState; import com.hubspot.singularity.SingularityMachineStateHistoryUpdate; import com.hubspot.singularity.SingularitySlave; import com.hubspot.singularity.config.SingularityConfiguration; @@ -38,7 +39,7 @@ protected String getRoot() { } public void activateLeaderCache() { - leaderCache.cacheSlaves(getObjects()); + leaderCache.cacheSlaves(getObjectsNoCache(getRoot())); } public Optional getSlave(String slaveId) { @@ -50,23 +51,17 @@ public Optional getSlave(String slaveId) { } @Override - public int getNumActive() { - if (leaderCache.active()) { - return Math.toIntExact(leaderCache.getSlaves().stream().filter(x -> x.getCurrentState().getState().equals(MachineState.ACTIVE)).count()); - } - - return super.getNumActive(); + public List getObjectsFromLeaderCache() { + return leaderCache.getSlaves(); } @Override - public void saveObject(SingularitySlave slave) { - if (leaderCache.active()) { - leaderCache.putSlave(slave); - } - - super.saveObject(slave); + public void saveObjectToLeaderCache(SingularitySlave singularitySlave) { + leaderCache.putSlave(singularitySlave); } - - + @Override + public void deleteFromLeaderCache(String slaveId) { + leaderCache.removeSlave(slaveId); + } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java index 001ae370b7..6c0ccb4d18 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java @@ -409,8 +409,8 @@ public void deleteTaskHistory(SingularityTaskId taskId) { historyUpdates.remove(taskId); } - public Collection getSlaves() { - return slaves.values(); + public List getSlaves() { + return new ArrayList<>(slaves.values()); } public Optional getSlave(String slaveId) { @@ -425,8 +425,16 @@ public void putSlave(SingularitySlave slave) { slaves.put(slave.getId(), slave); } - public Collection getRacks() { - return racks.values(); + public void removeSlave(String slaveId) { + if (!active) { + LOG.warn("remove slave {}, but not active", slaveId); + return; + } + slaves.remove(slaveId); + } + + public List getRacks() { + return new ArrayList<>(racks.values()); } public Optional getRack(String rackName) { @@ -441,6 +449,14 @@ public void putRack(SingularityRack rack) { racks.put(rack.getId(), rack); } + public void removeRack(String rackId) { + if (!active) { + LOG.warn("remove rack {}, but not active", rackId); + return; + } + racks.remove(rackId); + } + public void putRequestUtilization(RequestUtilization requestUtilization) { if (!active) { LOG.warn("putRequestUtilization {}, but not active", requestUtilization); From 5e3b05d763631f9d38d98d711a9f708fce51bd8e Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 12:13:30 -0400 Subject: [PATCH 28/37] Add automatic cleanup of inactive hosts list --- .../config/SingularityConfiguration.java | 10 ++++++++++ .../singularity/data/InactiveSlaveManager.java | 11 +++++++++++ .../SingularitySlaveReconciliationPoller.java | 15 +++++++++++++-- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java b/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java index 21bb3a5c8b..d0650b603a 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java @@ -251,6 +251,8 @@ public class SingularityConfiguration extends Configuration { private long reconcileSlavesEveryMinutes = TimeUnit.HOURS.toMinutes(1); + private long cleanInactiveHostListEveryHours = 24; + @JsonProperty("s3") private S3Configuration s3Configuration; @@ -1219,6 +1221,14 @@ public void setReconcileSlavesEveryMinutes(long reconcileSlavesEveryMinutes) { this.reconcileSlavesEveryMinutes = reconcileSlavesEveryMinutes; } + public long getCleanInactiveHostListEveryHours() { + return cleanInactiveHostListEveryHours; + } + + public void setCleanInactiveHostListEveryHours(long cleanInactiveHostListEveryHours) { + this.cleanInactiveHostListEveryHours = cleanInactiveHostListEveryHours; + } + public long getCacheTasksForMillis() { return cacheTasksForMillis; } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/InactiveSlaveManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/InactiveSlaveManager.java index d6cc92da26..38be35c0ba 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/InactiveSlaveManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/InactiveSlaveManager.java @@ -4,8 +4,10 @@ import java.util.Set; import org.apache.curator.framework.CuratorFramework; +import org.apache.zookeeper.data.Stat; import com.codahale.metrics.MetricRegistry; +import com.google.common.base.Optional; import com.google.inject.Inject; import com.google.inject.Singleton; import com.hubspot.singularity.config.SingularityConfiguration; @@ -40,4 +42,13 @@ public boolean isInactive(String host) { private String pathOf(String host) { return String.format("%s/%s", ROOT_PATH, host); } + + public void cleanInactiveSlavesList(long thresholdTime) { + for (String host : getInactiveSlaves()) { + Optional stat = checkExists(pathOf(host)); + if (stat.isPresent() && stat.get().getMtime() < thresholdTime) { + delete(pathOf(host)); + } + } + } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularitySlaveReconciliationPoller.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularitySlaveReconciliationPoller.java index e51ec37347..8ff9d14635 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularitySlaveReconciliationPoller.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularitySlaveReconciliationPoller.java @@ -12,6 +12,7 @@ import com.google.common.base.Optional; import com.google.inject.Inject; import com.hubspot.mesos.JavaUtils; +import com.hubspot.singularity.data.InactiveSlaveManager; import com.hubspot.singularity.helpers.MesosUtils; import com.hubspot.mesos.client.MesosClient; import com.hubspot.mesos.json.MesosMasterStateObject; @@ -33,8 +34,14 @@ public class SingularitySlaveReconciliationPoller extends SingularityLeaderOnlyP private final SingularitySlaveAndRackManager slaveAndRackManager; private final MesosClient mesosClient; private final SingularityMesosScheduler mesosScheduler; - - @Inject SingularitySlaveReconciliationPoller(SingularityConfiguration configuration, SlaveManager slaveManager, SingularitySlaveAndRackManager slaveAndRackManager, MesosClient mesosClient, SingularityMesosScheduler mesosScheduler) { + private final InactiveSlaveManager inactiveSlaveManager; + + @Inject SingularitySlaveReconciliationPoller(SingularityConfiguration configuration, + SlaveManager slaveManager, + SingularitySlaveAndRackManager slaveAndRackManager, + MesosClient mesosClient, + SingularityMesosScheduler mesosScheduler, + InactiveSlaveManager inactiveSlaveManager) { super(configuration.getReconcileSlavesEveryMinutes(), TimeUnit.MINUTES); this.slaveManager = slaveManager; @@ -42,12 +49,14 @@ public class SingularitySlaveReconciliationPoller extends SingularityLeaderOnlyP this.slaveAndRackManager = slaveAndRackManager; this.mesosClient = mesosClient; this.mesosScheduler = mesosScheduler; + this.inactiveSlaveManager = inactiveSlaveManager; } @Override public void runActionOnPoll() { refereshSlavesAndRacks(); checkDeadSlaves(); + inactiveSlaveManager.cleanInactiveSlavesList(System.currentTimeMillis() - TimeUnit.HOURS.toMillis(configuration.getCleanInactiveHostListEveryHours())); } private void refereshSlavesAndRacks() { @@ -89,6 +98,8 @@ private void checkDeadSlaves() { } } + + LOG.debug("Checked {} dead slaves, deleted {} in {}", deadSlaves.size(), deleted, JavaUtils.duration(start)); } From fa432ca981a296b8b27d6faec9a0c930890682b5 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 13:50:02 -0400 Subject: [PATCH 29/37] clear old slave history entries as well --- .../config/SingularityConfiguration.java | 10 ++++++++++ .../singularity/data/AbstractMachineManager.java | 13 +++++++++++++ .../SingularitySlaveReconciliationPoller.java | 7 +++++++ 3 files changed, 30 insertions(+) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java b/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java index d0650b603a..bdacc111e5 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java @@ -146,6 +146,8 @@ public class SingularityConfiguration extends Configuration { private long deleteDeadSlavesAfterHours = TimeUnit.DAYS.toHours(7); + private int maxMachineHistoryEntries = 10; + private long deleteStaleRequestsFromZkWhenNoDatabaseAfterHours = TimeUnit.DAYS.toHours(14); private Optional maxRequestsWithHistoryInZkWhenNoDatabase = Optional.absent(); @@ -690,6 +692,14 @@ public void setDeleteDeadSlavesAfterHours(long deleteDeadSlavesAfterHours) { this.deleteDeadSlavesAfterHours = deleteDeadSlavesAfterHours; } + public int getMaxMachineHistoryEntries() { + return maxMachineHistoryEntries; + } + + public void setMaxMachineHistoryEntries(int maxMachineHistoryEntries) { + this.maxMachineHistoryEntries = maxMachineHistoryEntries; + } + public int getListenerThreadpoolSize() { return listenerThreadpoolSize; } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java index bfeb68bed1..183927eddf 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java @@ -1,5 +1,6 @@ package com.hubspot.singularity.data; +import java.util.Comparator; import java.util.List; import java.util.Map; @@ -31,6 +32,7 @@ public abstract class AbstractMachineManager transcoder; private final Transcoder historyTranscoder; private final Transcoder expiringMachineStateTranscoder; + private final int maxHistoryEntries; public AbstractMachineManager(CuratorFramework curator, SingularityConfiguration configuration, MetricRegistry metricRegistry, Transcoder transcoder, Transcoder historyTranscoder, Transcoder expiringMachineStateTranscoder) { @@ -39,6 +41,7 @@ public AbstractMachineManager(CuratorFramework curator, SingularityConfiguration this.transcoder = transcoder; this.historyTranscoder = historyTranscoder; this.expiringMachineStateTranscoder = expiringMachineStateTranscoder; + this.maxHistoryEntries = configuration.getMaxMachineHistoryEntries(); } protected abstract String getRoot(); @@ -208,6 +211,16 @@ private String getHistoryUpdatePath(SingularityMachineStateHistoryUpdate history return ZKPaths.makePath(getHistoryPath(historyUpdate.getObjectId()), historyChildPath); } + public void clearOldHistory(String machineId) { + List histories = getHistory(machineId); + histories.sort(Comparator.comparingLong(SingularityMachineStateHistoryUpdate::getTimestamp)); + histories.stream() + .skip(maxHistoryEntries) + .forEach((history) -> { + delete(getHistoryUpdatePath(history)); + }); + } + private SingularityCreateResult saveHistoryUpdate(SingularityMachineStateHistoryUpdate historyUpdate) { return create(getHistoryUpdatePath(historyUpdate), historyUpdate, historyTranscoder); } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularitySlaveReconciliationPoller.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularitySlaveReconciliationPoller.java index 8ff9d14635..c386318057 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularitySlaveReconciliationPoller.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularitySlaveReconciliationPoller.java @@ -57,6 +57,7 @@ public void runActionOnPoll() { refereshSlavesAndRacks(); checkDeadSlaves(); inactiveSlaveManager.cleanInactiveSlavesList(System.currentTimeMillis() - TimeUnit.HOURS.toMillis(configuration.getCleanInactiveHostListEveryHours())); + clearOldSlaveHistory(); } private void refereshSlavesAndRacks() { @@ -103,4 +104,10 @@ private void checkDeadSlaves() { LOG.debug("Checked {} dead slaves, deleted {} in {}", deadSlaves.size(), deleted, JavaUtils.duration(start)); } + private void clearOldSlaveHistory() { + for (SingularitySlave singularitySlave : slaveManager.getObjects()) { + slaveManager.clearOldHistory(singularitySlave.getId()); + } + } + } From 123a6699ca643cdc27b5c7a94b70aa78ded30e79 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 14:19:53 -0400 Subject: [PATCH 30/37] remove task id bytes calc --- .../main/java/com/hubspot/singularity/data/TaskManager.java | 4 ---- .../com/hubspot/singularity/resources/MetricsResource.java | 1 - 2 files changed, 5 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java index 1cdc20928a..6263afb07e 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java @@ -1166,10 +1166,6 @@ public long getTaskStatusBytes() { return countBytes(getChildren(LAST_ACTIVE_TASK_STATUSES_PATH_ROOT)); } - public long getActiveTaskIdBytes() { - return countBytes(getChildren(ACTIVE_PATH_ROOT)); - } - public long getTaskHistoryIdBytes() { return countBytes(getChildren(HISTORY_PATH_ROOT)); } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/resources/MetricsResource.java b/SingularityService/src/main/java/com/hubspot/singularity/resources/MetricsResource.java index b8bfcdafd7..acf8b68291 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/resources/MetricsResource.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/resources/MetricsResource.java @@ -55,7 +55,6 @@ public Map getZkBytesMetrics() { return ImmutableMap.of( "allRequestIds", requestManager.getAllRequestIdsBytes(), "taskStatuses", taskManager.getTaskStatusBytes(), - "activeTaskIds", taskManager.getActiveTaskIdBytes(), "taskHistoryIds", taskManager.getTaskHistoryIdBytes() ); } From fe0565e879db75ef873ce5258c21f3d302e935cb Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 14:59:52 -0400 Subject: [PATCH 31/37] bump zk back down to 3.4.x --- pom.xml | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6088c0d7f9..832ee454ce 100644 --- a/pom.xml +++ b/pom.xml @@ -64,7 +64,7 @@ 1.3.8 1.7.25 2.0.0 - 3.5.4-beta + 3.4.10 1.3.5.1 1.3.7 0.1.1 @@ -382,6 +382,102 @@ + + org.apache.curator + curator-client + ${dep.curator.version} + + + com.google.guava + guava + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + + org.apache.zookeeper + zookeeper + + + + + + org.apache.curator + curator-framework + ${dep.curator.version} + + + com.google.guava + guava + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + + org.apache.zookeeper + zookeeper + + + + + + org.apache.curator + curator-recipes + ${dep.curator.version} + + + com.google.guava + guava + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + + org.apache.zookeeper + zookeeper + + + + + + org.apache.curator + curator-test + ${dep.curator.version} + + + com.google.guava + guava + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + + org.apache.zookeeper + zookeeper + + + + org.dmfs lib-recur From fc2e60adc8916eabb5b1b587880aab298d18da17 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 15:12:06 -0400 Subject: [PATCH 32/37] older curator-test version --- pom.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 832ee454ce..14541fcd41 100644 --- a/pom.xml +++ b/pom.xml @@ -457,7 +457,11 @@ org.apache.curator curator-test - ${dep.curator.version} + + 2.13.0 com.google.guava From 1ef26f9d1d9a169c6f2d7774c1998b24beb0b510 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 15:30:15 -0400 Subject: [PATCH 33/37] fix pending path parent calc --- .../java/com/hubspot/singularity/data/TaskManager.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java index 6263afb07e..513842dd93 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/TaskManager.java @@ -848,7 +848,12 @@ public void activateLeaderCache() { } private List fetchPendingTasks() { - return getAsyncNestedChildrenAsList(PENDING_PATH_ROOT, getChildren(PENDING_PATH_ROOT), pendingTaskTranscoder); + return getAsyncNestedChildrenAsList( + PENDING_PATH_ROOT, + getChildren(PENDING_PATH_ROOT).stream() + .map((p) -> ZKPaths.makePath(PENDING_PATH_ROOT, p)) + .collect(Collectors.toList()), + pendingTaskTranscoder); } public List getPendingTaskIds() { From 3148ed97b2f4b645277d60b49bd222843806133f Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 15:38:14 -0400 Subject: [PATCH 34/37] check zk migrations before filling leader cache --- .../singularity/mesos/SingularityMesosSchedulerImpl.java | 3 +++ .../com/hubspot/singularity/mesos/SingularityStartup.java | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosSchedulerImpl.java b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosSchedulerImpl.java index 5728b866b3..aa9b14392d 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosSchedulerImpl.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosSchedulerImpl.java @@ -153,6 +153,9 @@ public void subscribed(Subscribed subscribed) { heartbeatIntervalSeconds = Optional.of(advertisedHeartbeatIntervalSeconds); } + // Should be called before activation of leader cache or cache could be left empty + startup.checkMigrations(); + leaderCacheCoordinator.activateLeaderCache(); MasterInfo newMasterInfo = subscribed.getMasterInfo(); masterInfo.set(newMasterInfo); diff --git a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityStartup.java b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityStartup.java index 5034d0b3ae..cd524f2d9d 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityStartup.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityStartup.java @@ -73,6 +73,10 @@ class SingularityStartup { this.taskReconciliation = taskReconciliation; } + public void checkMigrations() { + zkDataMigrationRunner.checkMigrations(); + } + public void startup(MasterInfo masterInfo) { final long start = System.currentTimeMillis(); @@ -80,8 +84,6 @@ public void startup(MasterInfo masterInfo) { LOG.info("Starting up... fetching state data from: " + uri); - zkDataMigrationRunner.checkMigrations(); - MesosMasterStateObject state = mesosClient.getMasterState(uri); slaveAndRackManager.loadSlavesAndRacksFromMaster(state, true); From 0131f002741f5082eedef21e25c759b20d1f5316 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Mon, 22 Apr 2019 15:41:20 -0400 Subject: [PATCH 35/37] turn down test log level --- .../hubspot/singularity/scheduler/SingularityTestModule.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityTestModule.java b/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityTestModule.java index 6c568a5482..1b48bd11c9 100644 --- a/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityTestModule.java +++ b/SingularityService/src/test/java/com/hubspot/singularity/scheduler/SingularityTestModule.java @@ -104,7 +104,7 @@ public SingularityTestModule(boolean useDbTests,Function Date: Mon, 22 Apr 2019 16:11:51 -0400 Subject: [PATCH 36/37] better fallback to zk from leader cache --- .../data/AbstractMachineManager.java | 11 +++++++ .../hubspot/singularity/data/RackManager.java | 30 ++++++++++++++----- .../singularity/data/SlaveManager.java | 25 ++++++++++++---- .../helpers/RebalancingHelper.java | 2 +- .../mesos/SingularitySlaveAndRackManager.java | 8 ++--- 5 files changed, 58 insertions(+), 18 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java index 183927eddf..f9586aa451 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java @@ -120,6 +120,17 @@ private String getObjectPath(String objectId) { } public Optional getObject(String objectId) { + Optional maybeCached = getObjectFromLeaderCache(objectId); + if(!maybeCached.isPresent()) { + return getObjectNoCache(objectId); + } else { + return maybeCached; + } + } + + protected abstract Optional getObjectFromLeaderCache(String objectId); + + public Optional getObjectNoCache(String objectId) { return getData(getObjectPath(objectId), transcoder); } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/RackManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/RackManager.java index 01a5629c82..ce08b4e52e 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/RackManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/RackManager.java @@ -3,6 +3,8 @@ import java.util.List; import org.apache.curator.framework.CuratorFramework; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.codahale.metrics.MetricRegistry; import com.google.common.base.Optional; @@ -17,6 +19,7 @@ @Singleton public class RackManager extends AbstractMachineManager { + private static final Logger LOG = LoggerFactory.getLogger(RackManager.class); private static final String RACK_ROOT = "/racks"; private final SingularityLeaderCache leaderCache; @@ -43,27 +46,38 @@ public void activateLeaderCache() { leaderCache.cacheRacks(getObjectsNoCache(getRoot())); } - public Optional getRack(String rackName) { + @Override + public Optional getObjectFromLeaderCache(String rackId) { if (leaderCache.active()) { - return leaderCache.getRack(rackName); + return leaderCache.getRack(rackId); } - return getObject(rackName); + return Optional.absent(); // fallback to zk } @Override public List getObjectsFromLeaderCache() { - return leaderCache.getRacks(); + if (leaderCache.active()) { + return leaderCache.getRacks(); + } + return null; // fallback to zk } @Override - public void saveObjectToLeaderCache(SingularityRack rack) { - leaderCache.putRack(rack); + public void saveObjectToLeaderCache(SingularityRack rackId) { + if (leaderCache.active()) { + leaderCache.putRack(rackId); + } else { + LOG.info("Asked to save slaves to leader cache when not active"); + } } @Override public void deleteFromLeaderCache(String rackId) { - leaderCache.removeRack(rackId); + if (leaderCache.active()) { + leaderCache.removeRack(rackId); + } else { + LOG.info("Asked to remove slave from leader cache when not active"); + } } - } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/SlaveManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/SlaveManager.java index 24f9b0c454..72daf661ae 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/SlaveManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/SlaveManager.java @@ -3,6 +3,8 @@ import java.util.List; import org.apache.curator.framework.CuratorFramework; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.codahale.metrics.MetricRegistry; import com.google.common.base.Optional; @@ -17,6 +19,7 @@ @Singleton public class SlaveManager extends AbstractMachineManager { + private static final Logger LOG = LoggerFactory.getLogger(SlaveManager.class); private static final String SLAVE_ROOT = "/slaves"; private final SingularityLeaderCache leaderCache; @@ -42,26 +45,38 @@ public void activateLeaderCache() { leaderCache.cacheSlaves(getObjectsNoCache(getRoot())); } - public Optional getSlave(String slaveId) { + @Override + public Optional getObjectFromLeaderCache(String slaveId) { if (leaderCache.active()) { return leaderCache.getSlave(slaveId); } - return getObject(slaveId); + return Optional.absent(); // fallback to zk } @Override public List getObjectsFromLeaderCache() { - return leaderCache.getSlaves(); + if (leaderCache.active()) { + return leaderCache.getSlaves(); + } + return null; // fallback to zk } @Override public void saveObjectToLeaderCache(SingularitySlave singularitySlave) { - leaderCache.putSlave(singularitySlave); + if (leaderCache.active()) { + leaderCache.putSlave(singularitySlave); + } else { + LOG.info("Asked to save slaves to leader cache when not active"); + } } @Override public void deleteFromLeaderCache(String slaveId) { - leaderCache.removeSlave(slaveId); + if (leaderCache.active()) { + leaderCache.removeSlave(slaveId); + } else { + LOG.info("Asked to remove slave from leader cache when not active"); + } } } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/helpers/RebalancingHelper.java b/SingularityService/src/main/java/com/hubspot/singularity/helpers/RebalancingHelper.java index fad7090d32..fb213aa16a 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/helpers/RebalancingHelper.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/helpers/RebalancingHelper.java @@ -70,7 +70,7 @@ public Set rebalanceAttributeDistribution( Map>> attributeTaskMap = new HashMap<>(); for (SingularityTaskId taskId : remainingActiveTasks) { - SingularitySlave slave = slaveManager.getSlave(taskManager.getTask(taskId).get().getMesosTask().getSlaveId().getValue()).get(); + SingularitySlave slave = slaveManager.getObject(taskManager.getTask(taskId).get().getMesosTask().getSlaveId().getValue()).get(); for (Entry entry : slave.getAttributes().entrySet()) { attributeTaskMap .computeIfAbsent(entry.getKey(), key -> new HashMap<>()) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularitySlaveAndRackManager.java b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularitySlaveAndRackManager.java index e888cfc06f..9e824910ba 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularitySlaveAndRackManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularitySlaveAndRackManager.java @@ -88,7 +88,7 @@ SlaveMatchState doesOfferMatch(SingularityOfferHolder offerHolder, SingularityTa final String rackId = offerHolder.getRackId(); final String slaveId = offerHolder.getSlaveId(); - final MachineState currentSlaveState = slaveManager.getSlave(slaveId).get().getCurrentState().getState(); + final MachineState currentSlaveState = slaveManager.getObject(slaveId).get().getCurrentState().getState(); if (currentSlaveState == MachineState.FROZEN) { return SlaveMatchState.SLAVE_FROZEN; @@ -98,7 +98,7 @@ SlaveMatchState doesOfferMatch(SingularityOfferHolder offerHolder, SingularityTa return SlaveMatchState.SLAVE_DECOMMISSIONING; } - final MachineState currentRackState = rackManager.getRack(rackId).get().getCurrentState().getState(); + final MachineState currentRackState = rackManager.getObject(rackId).get().getCurrentState().getState(); if (currentRackState == MachineState.FROZEN) { return SlaveMatchState.RACK_FROZEN; @@ -308,7 +308,7 @@ private boolean areSlaveAttributeMinimumsFeasible(SingularityOfferHolder offerHo if (!taskRequest.getRequest().getSlaveAttributeMinimums().isPresent()) { return true; } - Map offerAttributes = slaveManager.getSlave(offerHolder.getSlaveId()).get().getAttributes(); + Map offerAttributes = slaveManager.getObject(offerHolder.getSlaveId()).get().getAttributes(); Integer numDesiredInstances = taskRequest.getRequest().getInstancesSafe(); Integer numActiveInstances = activeTaskIdsForRequest.size(); @@ -537,7 +537,7 @@ public CheckResult checkOffer(Offer offer) { } void checkStateAfterFinishedTask(SingularityTaskId taskId, String slaveId, SingularityLeaderCache leaderCache) { - Optional slave = slaveManager.getSlave(slaveId); + Optional slave = slaveManager.getObject(slaveId); if (!slave.isPresent()) { final String message = String.format("Couldn't find slave with id %s for task %s", slaveId, taskId); From e7231c7a316a876012b14fb2a37ab134a3b36bd2 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Thu, 25 Apr 2019 08:33:02 -0400 Subject: [PATCH 37/37] fix for PR comments --- .../data/AbstractMachineManager.java | 2 +- .../NamespaceActiveTasksMigration.java | 27 ++++++++++++------- .../NamespacePendingTasksMigration.java | 27 ++++++++++++------- .../SingularityHealthcheckAsyncHandler.java | 2 +- .../scheduler/SingularityHealthchecker.java | 2 +- .../scheduler/SingularityLeaderCache.java | 4 +-- 6 files changed, 41 insertions(+), 23 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java b/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java index f9586aa451..31fafed18b 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/AbstractMachineManager.java @@ -224,7 +224,7 @@ private String getHistoryUpdatePath(SingularityMachineStateHistoryUpdate history public void clearOldHistory(String machineId) { List histories = getHistory(machineId); - histories.sort(Comparator.comparingLong(SingularityMachineStateHistoryUpdate::getTimestamp)); + histories.sort(Comparator.comparingLong(SingularityMachineStateHistoryUpdate::getTimestamp).reversed()); histories.stream() .skip(maxHistoryEntries) .forEach((history) -> { diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java index 3f317b6838..46cee8e916 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespaceActiveTasksMigration.java @@ -4,11 +4,16 @@ import org.apache.curator.framework.CuratorFramework; import org.apache.curator.utils.ZKPaths; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.inject.Inject; +import com.hubspot.singularity.InvalidSingularityTaskIdException; import com.hubspot.singularity.SingularityTaskId; public class NamespaceActiveTasksMigration extends ZkDataMigration { + private static final Logger LOG = LoggerFactory.getLogger(NamespaceActiveTasksMigration.class); + private static final String ACTIVE_TASKS_ROOT = "/tasks/active"; private static final String ACTIVE_STATUSES_ROOT = "/tasks/statuses"; @@ -26,16 +31,20 @@ public void applyMigration() { if (curatorFramework.checkExists().forPath(ACTIVE_TASKS_ROOT) != null) { List currentActive = curatorFramework.getChildren().forPath(ACTIVE_STATUSES_ROOT); for (String taskIdString : currentActive) { - SingularityTaskId taskId = SingularityTaskId.valueOf(taskIdString); - String oldPath = ZKPaths.makePath(ACTIVE_STATUSES_ROOT, taskIdString); - byte[] oldData = curatorFramework.getData().forPath(oldPath); - String newPath = ZKPaths.makePath(ACTIVE_STATUSES_ROOT, taskId.getRequestId(), taskIdString); - if (curatorFramework.checkExists().forPath(newPath) != null) { - curatorFramework.setData().forPath(newPath, oldData); - } else { - curatorFramework.create().creatingParentsIfNeeded().forPath(newPath, oldData); + try { + SingularityTaskId taskId = SingularityTaskId.valueOf(taskIdString); + String oldPath = ZKPaths.makePath(ACTIVE_STATUSES_ROOT, taskIdString); + byte[] oldData = curatorFramework.getData().forPath(oldPath); + String newPath = ZKPaths.makePath(ACTIVE_STATUSES_ROOT, taskId.getRequestId(), taskIdString); + if (curatorFramework.checkExists().forPath(newPath) != null) { + curatorFramework.setData().forPath(newPath, oldData); + } else { + curatorFramework.create().creatingParentsIfNeeded().forPath(newPath, oldData); + } + curatorFramework.delete().forPath(oldPath); + } catch (InvalidSingularityTaskIdException e) { + LOG.warn("Found invalid task id {}. This is likely because the migration did not finish successfully on a previous run. Will continue to migrate additional nodes", taskIdString); } - curatorFramework.delete().forPath(oldPath); } curatorFramework.delete().deletingChildrenIfNeeded().forPath(ACTIVE_TASKS_ROOT); diff --git a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespacePendingTasksMigration.java b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespacePendingTasksMigration.java index 6573fc3dc0..bf5e147d26 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespacePendingTasksMigration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/data/zkmigrations/NamespacePendingTasksMigration.java @@ -4,11 +4,16 @@ import org.apache.curator.framework.CuratorFramework; import org.apache.curator.utils.ZKPaths; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.inject.Inject; +import com.hubspot.singularity.InvalidSingularityTaskIdException; import com.hubspot.singularity.SingularityPendingTaskId; public class NamespacePendingTasksMigration extends ZkDataMigration { + private static final Logger LOG = LoggerFactory.getLogger(NamespacePendingTasksMigration.class); + private static final String PENDING_TASK_ROOT = "/tasks/scheduled"; private final CuratorFramework curatorFramework; @@ -25,16 +30,20 @@ public void applyMigration() { if (curatorFramework.checkExists().forPath(PENDING_TASK_ROOT) != null) { List currentPendingTasks = curatorFramework.getChildren().forPath(PENDING_TASK_ROOT); for (String taskIdString : currentPendingTasks) { - SingularityPendingTaskId pendingTaskId = SingularityPendingTaskId.valueOf(taskIdString); - String oldPath = ZKPaths.makePath(PENDING_TASK_ROOT, taskIdString); - byte[] oldData = curatorFramework.getData().forPath(oldPath); - String newPath = ZKPaths.makePath(PENDING_TASK_ROOT, pendingTaskId.getRequestId(), taskIdString); - if (curatorFramework.checkExists().forPath(newPath) != null) { - curatorFramework.setData().forPath(newPath, oldData); - } else { - curatorFramework.create().creatingParentsIfNeeded().forPath(newPath, oldData); + try { + SingularityPendingTaskId pendingTaskId = SingularityPendingTaskId.valueOf(taskIdString); + String oldPath = ZKPaths.makePath(PENDING_TASK_ROOT, taskIdString); + byte[] oldData = curatorFramework.getData().forPath(oldPath); + String newPath = ZKPaths.makePath(PENDING_TASK_ROOT, pendingTaskId.getRequestId(), taskIdString); + if (curatorFramework.checkExists().forPath(newPath) != null) { + curatorFramework.setData().forPath(newPath, oldData); + } else { + curatorFramework.create().creatingParentsIfNeeded().forPath(newPath, oldData); + } + curatorFramework.delete().forPath(oldPath); + } catch (InvalidSingularityTaskIdException e) { + LOG.warn("Found invalid task id {}, will skip", taskIdString); } - curatorFramework.delete().forPath(oldPath); } } } catch (Throwable t) { diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java index 0f8d514788..75e2eab125 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthcheckAsyncHandler.java @@ -41,7 +41,7 @@ public SingularityHealthcheckAsyncHandler(SingularityExceptionNotifier exception startTime = System.currentTimeMillis(); } - public void setHealthcehckUri(String healthcheckUri) { + public void setHealthcheckUri(String healthcheckUri) { this.healthcheckUri = healthcheckUri; } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java index 64e2a16f2e..4745c1e356 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityHealthchecker.java @@ -337,7 +337,7 @@ private void asyncHealthcheck(final SingularityTask task) { saveFailure(handler, "Invalid healthcheck uri or ports not present"); return; } - handler.setHealthcehckUri(uri.get()); + handler.setHealthcheckUri(uri.get()); final Integer timeoutSeconds; final String method; diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java index 6c0ccb4d18..9d8c2c1dfd 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityLeaderCache.java @@ -437,8 +437,8 @@ public List getRacks() { return new ArrayList<>(racks.values()); } - public Optional getRack(String rackName) { - return Optional.fromNullable(racks.get(rackName)); + public Optional getRack(String rackId) { + return Optional.fromNullable(racks.get(rackId)); } public void putRack(SingularityRack rack) {