From b211060a0c376b292df30edc7590f8a81826c6ac Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Thu, 6 Jun 2024 08:52:39 +0200 Subject: [PATCH] Handle Pod decorator failure by aborting the pending queue item. (#1570) --- .../kubernetes/KubernetesLauncher.java | 31 +++++++++++++++---- .../plugins/kubernetes/KubernetesSlave.java | 8 +++-- .../plugins/kubernetes/PodTemplate.java | 17 ++++++++++ .../jenkins/plugins/kubernetes/PodUtils.java | 17 ++++++++-- .../pipeline/PodTemplateStepExecution.java | 2 ++ .../pod/decorator/PodDecorator.java | 9 +++++- .../pod/decorator/PodDecoratorException.java | 14 +++++++++ .../pipeline/KubernetesPipelineTest.java | 21 +++++++++++++ .../pipeline/decoratorFailure.groovy | 5 +++ 9 files changed, 112 insertions(+), 12 deletions(-) create mode 100644 src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/decorator/PodDecoratorException.java create mode 100644 src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/decoratorFailure.groovy diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java index e8f3cfa87d..93c2d34a2b 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java @@ -32,6 +32,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import hudson.Functions; import hudson.model.Descriptor; +import hudson.model.Run; import hudson.model.TaskListener; import hudson.slaves.ComputerLauncher; import hudson.slaves.JNLPLauncher; @@ -54,6 +55,7 @@ import java.util.stream.Collectors; import jenkins.metrics.api.Metrics; import org.apache.commons.lang.StringUtils; +import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecoratorException; import org.csanchez.jenkins.plugins.kubernetes.pod.retention.Reaper; import org.kohsuke.stapler.DataBoundConstructor; @@ -117,7 +119,20 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { PodTemplate template = node.getTemplate(); KubernetesCloud cloud = node.getKubernetesCloud(); KubernetesClient client = cloud.connect(); - Pod pod = template.build(node); + Pod pod; + try { + pod = template.build(node); + } catch (PodDecoratorException e) { + Run run = template.getRun(); + if (run != null) { + template.getListener().getLogger().println("Failed to build pod definition : " + e.getMessage()); + PodUtils.cancelQueueItemFor(run.getUrl(), template.getLabel(), e.getMessage(), null); + } + e.printStackTrace(listener.fatalError("Failed to build pod definition")); + setProblem(e); + terminateOrLog(node); + return; + } node.assignPod(pod); String podName = pod.getMetadata().getName(); @@ -305,15 +320,19 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { String.format("Error in provisioning; agent=%s, template=%s", node, node.getTemplateId()), ex); LOGGER.log(Level.FINER, "Removing Jenkins node: {0}", node.getNodeName()); - try { - node.terminate(); - } catch (IOException | InterruptedException e) { - LOGGER.log(Level.WARNING, "Unable to remove Jenkins node", e); - } + terminateOrLog(node); throw new RuntimeException(ex); } } + private static void terminateOrLog(KubernetesSlave node) { + try { + node.terminate(); + } catch (IOException | InterruptedException e) { + LOGGER.log(Level.WARNING, "Unable to remove Jenkins node", e); + } + } + private void checkTerminatedContainers( List terminatedContainers, String podId, diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java index bfc09685a0..5837dc4a0d 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java @@ -4,6 +4,7 @@ import edu.umd.cs.findbugs.annotations.CheckForNull; import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import hudson.Extension; import hudson.FilePath; @@ -221,7 +222,7 @@ public void setNamespace(@NonNull String namespace) { this.namespace = namespace; } - @NonNull + @Nullable public String getNamespace() { return namespace; } @@ -423,7 +424,10 @@ name, getPodRetention(cloud) listener.getLogger().println(msg); } - private void deleteSlavePod(TaskListener listener, KubernetesClient client) throws IOException { + private void deleteSlavePod(TaskListener listener, KubernetesClient client) { + if (getNamespace() == null) { + return; + } try { boolean deleted = client.pods() .inNamespace(getNamespace()) diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodTemplate.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodTemplate.java index 9281221d27..e4c3e385fb 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodTemplate.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodTemplate.java @@ -10,6 +10,7 @@ import hudson.model.DescriptorVisibilityFilter; import hudson.model.Label; import hudson.model.Node; +import hudson.model.Run; import hudson.model.Saveable; import hudson.model.TaskListener; import hudson.model.labels.LabelAtom; @@ -80,6 +81,13 @@ public class PodTemplate extends AbstractDescribableImpl implements public static final String JENKINS_LABEL = "jenkins/label"; public static final String JENKINS_LABEL_DIGEST = "jenkins/label-digest"; + /** + * The run this pod template is associated with. + * Only applicable to pod templates defined by the `podTemplate` step. + */ + @CheckForNull + private transient Run run; + /** * Digest function that is used to compute the kubernetes label "jenkins/label-digest" * Not used for security. @@ -1059,6 +1067,15 @@ public void setReadonlyFromUi(boolean readonlyFromUi) { this.readonlyFromUi = readonlyFromUi; } + public void setRun(Run run) { + this.run = run; + } + + @CheckForNull + public Run getRun() { + return run; + } + @Extension public static class DescriptorImpl extends Descriptor { diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodUtils.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodUtils.java index 64567f6f89..7197f5d3bf 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodUtils.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodUtils.java @@ -102,13 +102,20 @@ public static void cancelQueueItemFor(Pod pod, String reason) { LOGGER.log(Level.FINE, () -> "Pod " + podDisplayName + " .metadata.labels is null"); return; } - var jenkinsLabel = labels.get(PodTemplate.JENKINS_LABEL); + cancelQueueItemFor(runUrl, labels.get(PodTemplate.JENKINS_LABEL), reason, podDisplayName); + } + + public static void cancelQueueItemFor( + @NonNull String runUrl, + @NonNull String label, + @CheckForNull String reason, + @CheckForNull String podDisplayName) { var queue = Jenkins.get().getQueue(); Arrays.stream(queue.getItems()) .filter(item -> item.getTask().getUrl().equals(runUrl)) .filter(item -> Optional.ofNullable(item.getAssignedLabel()) .map(Label::getName) - .map(name -> PodTemplateUtils.sanitizeLabel(name).equals(jenkinsLabel)) + .map(name -> PodTemplateUtils.sanitizeLabel(name).equals(label)) .orElse(false)) .findFirst() .ifPresentOrElse( @@ -119,7 +126,11 @@ public static void cancelQueueItemFor(Pod pod, String reason) { + (!StringUtils.isBlank(reason) ? "due to " + reason : "")); queue.cancel(item); }, - () -> LOGGER.log(Level.FINE, () -> "No queue item found for pod " + podDisplayName)); + () -> { + if (podDisplayName != null) { + LOGGER.log(Level.FINE, () -> "No queue item found for pod " + podDisplayName); + } + }); } @CheckForNull diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/PodTemplateStepExecution.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/PodTemplateStepExecution.java index 25f2690bad..4e2d957837 100755 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/PodTemplateStepExecution.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/PodTemplateStepExecution.java @@ -132,6 +132,7 @@ public boolean start() throws Exception { newTemplate.getAnnotations().add(new PodAnnotation(POD_ANNOTATION_BUILD_URL, url + run.getUrl())); newTemplate.getAnnotations().add(new PodAnnotation(POD_ANNOTATION_RUN_URL, run.getUrl())); } + newTemplate.setRun(run); } newTemplate.setImagePullSecrets(step.getImagePullSecrets().stream() .map(x -> new PodImagePullSecret(x)) @@ -240,6 +241,7 @@ public void onResume() { KubernetesCloud cloud = resolveCloud(cloudName); TaskListener listener = getContext().get(TaskListener.class); newTemplate.setListener(listener); + newTemplate.setRun(getContext().get(Run.class)); LOGGER.log(Level.FINE, "Re-registering template with id=" + newTemplate.getId() + " after resume"); if (VERBOSE) { listener.getLogger() diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/decorator/PodDecorator.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/decorator/PodDecorator.java index ff9ca8ea7a..23967fdaf1 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/decorator/PodDecorator.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/decorator/PodDecorator.java @@ -11,8 +11,15 @@ */ public interface PodDecorator extends ExtensionPoint { + /** + * Goes through all the {@link PodDecorator} extensions and decorates the pod. + * @param kubernetesCloud The cloud this pod will be scheduled as context. + * @param pod the initial pod definition before decoration. + * @return The modified pod definition. + * @throws PodDecoratorException Should any of the decorators fail to decorate the pod. + */ @NonNull - static Pod decorateAll(@NonNull KubernetesCloud kubernetesCloud, @NonNull Pod pod) { + static Pod decorateAll(@NonNull KubernetesCloud kubernetesCloud, @NonNull Pod pod) throws PodDecoratorException { for (PodDecorator decorator : ExtensionList.lookup(PodDecorator.class)) { pod = decorator.decorate(kubernetesCloud, pod); } diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/decorator/PodDecoratorException.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/decorator/PodDecoratorException.java new file mode 100644 index 0000000000..2f0ab1a8b3 --- /dev/null +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/decorator/PodDecoratorException.java @@ -0,0 +1,14 @@ +package org.csanchez.jenkins.plugins.kubernetes.pod.decorator; + +/** + * A fatal exception raised by a {@link PodDecorator} implementation. + */ +public class PodDecoratorException extends RuntimeException { + public PodDecoratorException(String message) { + super(message); + } + + public PodDecoratorException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java index 29dbe2ef79..ca7041ee51 100644 --- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java @@ -73,6 +73,7 @@ import jenkins.metrics.api.Metrics; import jenkins.model.Jenkins; import org.csanchez.jenkins.plugins.kubernetes.GarbageCollection; +import org.csanchez.jenkins.plugins.kubernetes.KubernetesCloud; import org.csanchez.jenkins.plugins.kubernetes.KubernetesComputer; import org.csanchez.jenkins.plugins.kubernetes.KubernetesSlave; import org.csanchez.jenkins.plugins.kubernetes.KubernetesTestUtil; @@ -80,6 +81,8 @@ import org.csanchez.jenkins.plugins.kubernetes.PodAnnotation; import org.csanchez.jenkins.plugins.kubernetes.PodTemplate; import org.csanchez.jenkins.plugins.kubernetes.PodTemplateUtils; +import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecorator; +import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecoratorException; import org.hamcrest.MatcherAssert; import org.htmlunit.html.DomNodeUtil; import org.htmlunit.html.HtmlElement; @@ -90,6 +93,7 @@ import org.jenkinsci.plugins.workflow.job.WorkflowRun; import org.jenkinsci.plugins.workflow.steps.durable_task.DurableTaskStep; import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; +import org.jetbrains.annotations.NotNull; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -103,6 +107,7 @@ import org.jvnet.hudson.test.JenkinsRuleNonLocalhost; import org.jvnet.hudson.test.LoggerRule; import org.jvnet.hudson.test.MockAuthorizationStrategy; +import org.jvnet.hudson.test.TestExtension; public class KubernetesPipelineTest extends AbstractKubernetesPipelineTest { @@ -928,4 +933,20 @@ public void handleEviction() throws Exception { SemaphoreStep.success("pod/2", null); r.assertBuildStatusSuccess(r.waitForCompletion(b)); } + + @Test + public void decoratorFailure() throws Exception { + r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b)); + r.assertLogContains("I always fail", b); + assertThat("Node should have been removed", r.jenkins.getNodes(), empty()); + } + + @TestExtension("decoratorFailure") + public static class DecoratorImpl implements PodDecorator { + @NotNull + @Override + public Pod decorate(@NotNull KubernetesCloud kubernetesCloud, @NotNull Pod pod) { + throw new PodDecoratorException("I always fail"); + } + } } diff --git a/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/decoratorFailure.groovy b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/decoratorFailure.groovy new file mode 100644 index 0000000000..08095fefdb --- /dev/null +++ b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/decoratorFailure.groovy @@ -0,0 +1,5 @@ +podTemplate { + node(POD_LABEL) { + sh 'true' + } +}