Skip to content

Commit

Permalink
Handle Pod decorator failure by aborting the pending queue item. (#1570)
Browse files Browse the repository at this point in the history
  • Loading branch information
Vlatombe authored Jun 6, 2024
1 parent 28c157c commit b211060
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import hudson.Functions;
import hudson.model.Descriptor;
import hudson.model.Run;
import hudson.model.TaskListener;
import hudson.slaves.ComputerLauncher;
import hudson.slaves.JNLPLauncher;
Expand All @@ -54,6 +55,7 @@
import java.util.stream.Collectors;
import jenkins.metrics.api.Metrics;
import org.apache.commons.lang.StringUtils;
import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecoratorException;
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.Reaper;
import org.kohsuke.stapler.DataBoundConstructor;

Expand Down Expand Up @@ -117,7 +119,20 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
PodTemplate template = node.getTemplate();
KubernetesCloud cloud = node.getKubernetesCloud();
KubernetesClient client = cloud.connect();
Pod pod = template.build(node);
Pod pod;
try {
pod = template.build(node);
} catch (PodDecoratorException e) {
Run<?, ?> run = template.getRun();
if (run != null) {
template.getListener().getLogger().println("Failed to build pod definition : " + e.getMessage());
PodUtils.cancelQueueItemFor(run.getUrl(), template.getLabel(), e.getMessage(), null);
}
e.printStackTrace(listener.fatalError("Failed to build pod definition"));
setProblem(e);
terminateOrLog(node);
return;
}
node.assignPod(pod);

String podName = pod.getMetadata().getName();
Expand Down Expand Up @@ -305,15 +320,19 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
String.format("Error in provisioning; agent=%s, template=%s", node, node.getTemplateId()),
ex);
LOGGER.log(Level.FINER, "Removing Jenkins node: {0}", node.getNodeName());
try {
node.terminate();
} catch (IOException | InterruptedException e) {
LOGGER.log(Level.WARNING, "Unable to remove Jenkins node", e);
}
terminateOrLog(node);
throw new RuntimeException(ex);
}
}

private static void terminateOrLog(KubernetesSlave node) {
try {
node.terminate();
} catch (IOException | InterruptedException e) {
LOGGER.log(Level.WARNING, "Unable to remove Jenkins node", e);
}
}

private void checkTerminatedContainers(
List<ContainerStatus> terminatedContainers,
String podId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import edu.umd.cs.findbugs.annotations.CheckForNull;
import edu.umd.cs.findbugs.annotations.NonNull;
import edu.umd.cs.findbugs.annotations.Nullable;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import hudson.Extension;
import hudson.FilePath;
Expand Down Expand Up @@ -221,7 +222,7 @@ public void setNamespace(@NonNull String namespace) {
this.namespace = namespace;
}

@NonNull
@Nullable
public String getNamespace() {
return namespace;
}
Expand Down Expand Up @@ -423,7 +424,10 @@ name, getPodRetention(cloud)
listener.getLogger().println(msg);
}

private void deleteSlavePod(TaskListener listener, KubernetesClient client) throws IOException {
private void deleteSlavePod(TaskListener listener, KubernetesClient client) {
if (getNamespace() == null) {
return;
}
try {
boolean deleted = client.pods()
.inNamespace(getNamespace())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import hudson.model.DescriptorVisibilityFilter;
import hudson.model.Label;
import hudson.model.Node;
import hudson.model.Run;
import hudson.model.Saveable;
import hudson.model.TaskListener;
import hudson.model.labels.LabelAtom;
Expand Down Expand Up @@ -80,6 +81,13 @@ public class PodTemplate extends AbstractDescribableImpl<PodTemplate> implements
public static final String JENKINS_LABEL = "jenkins/label";
public static final String JENKINS_LABEL_DIGEST = "jenkins/label-digest";

/**
* The run this pod template is associated with.
* Only applicable to pod templates defined by the `podTemplate` step.
*/
@CheckForNull
private transient Run<?, ?> run;

/**
* Digest function that is used to compute the kubernetes label "jenkins/label-digest"
* Not used for security.
Expand Down Expand Up @@ -1059,6 +1067,15 @@ public void setReadonlyFromUi(boolean readonlyFromUi) {
this.readonlyFromUi = readonlyFromUi;
}

public void setRun(Run<?, ?> run) {
this.run = run;
}

@CheckForNull
public Run<?, ?> getRun() {
return run;
}

@Extension
public static class DescriptorImpl extends Descriptor<PodTemplate> {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,20 @@ public static void cancelQueueItemFor(Pod pod, String reason) {
LOGGER.log(Level.FINE, () -> "Pod " + podDisplayName + " .metadata.labels is null");
return;
}
var jenkinsLabel = labels.get(PodTemplate.JENKINS_LABEL);
cancelQueueItemFor(runUrl, labels.get(PodTemplate.JENKINS_LABEL), reason, podDisplayName);
}

public static void cancelQueueItemFor(
@NonNull String runUrl,
@NonNull String label,
@CheckForNull String reason,
@CheckForNull String podDisplayName) {
var queue = Jenkins.get().getQueue();
Arrays.stream(queue.getItems())
.filter(item -> item.getTask().getUrl().equals(runUrl))
.filter(item -> Optional.ofNullable(item.getAssignedLabel())
.map(Label::getName)
.map(name -> PodTemplateUtils.sanitizeLabel(name).equals(jenkinsLabel))
.map(name -> PodTemplateUtils.sanitizeLabel(name).equals(label))
.orElse(false))
.findFirst()
.ifPresentOrElse(
Expand All @@ -119,7 +126,11 @@ public static void cancelQueueItemFor(Pod pod, String reason) {
+ (!StringUtils.isBlank(reason) ? "due to " + reason : ""));
queue.cancel(item);
},
() -> LOGGER.log(Level.FINE, () -> "No queue item found for pod " + podDisplayName));
() -> {
if (podDisplayName != null) {
LOGGER.log(Level.FINE, () -> "No queue item found for pod " + podDisplayName);
}
});
}

@CheckForNull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ public boolean start() throws Exception {
newTemplate.getAnnotations().add(new PodAnnotation(POD_ANNOTATION_BUILD_URL, url + run.getUrl()));
newTemplate.getAnnotations().add(new PodAnnotation(POD_ANNOTATION_RUN_URL, run.getUrl()));
}
newTemplate.setRun(run);
}
newTemplate.setImagePullSecrets(step.getImagePullSecrets().stream()
.map(x -> new PodImagePullSecret(x))
Expand Down Expand Up @@ -240,6 +241,7 @@ public void onResume() {
KubernetesCloud cloud = resolveCloud(cloudName);
TaskListener listener = getContext().get(TaskListener.class);
newTemplate.setListener(listener);
newTemplate.setRun(getContext().get(Run.class));
LOGGER.log(Level.FINE, "Re-registering template with id=" + newTemplate.getId() + " after resume");
if (VERBOSE) {
listener.getLogger()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,15 @@
*/
public interface PodDecorator extends ExtensionPoint {

/**
* Goes through all the {@link PodDecorator} extensions and decorates the pod.
* @param kubernetesCloud The cloud this pod will be scheduled as context.
* @param pod the initial pod definition before decoration.
* @return The modified pod definition.
* @throws PodDecoratorException Should any of the decorators fail to decorate the pod.
*/
@NonNull
static Pod decorateAll(@NonNull KubernetesCloud kubernetesCloud, @NonNull Pod pod) {
static Pod decorateAll(@NonNull KubernetesCloud kubernetesCloud, @NonNull Pod pod) throws PodDecoratorException {
for (PodDecorator decorator : ExtensionList.lookup(PodDecorator.class)) {
pod = decorator.decorate(kubernetesCloud, pod);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package org.csanchez.jenkins.plugins.kubernetes.pod.decorator;

/**
* A fatal exception raised by a {@link PodDecorator} implementation.
*/
public class PodDecoratorException extends RuntimeException {
public PodDecoratorException(String message) {
super(message);
}

public PodDecoratorException(String message, Throwable cause) {
super(message, cause);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,16 @@
import jenkins.metrics.api.Metrics;
import jenkins.model.Jenkins;
import org.csanchez.jenkins.plugins.kubernetes.GarbageCollection;
import org.csanchez.jenkins.plugins.kubernetes.KubernetesCloud;
import org.csanchez.jenkins.plugins.kubernetes.KubernetesComputer;
import org.csanchez.jenkins.plugins.kubernetes.KubernetesSlave;
import org.csanchez.jenkins.plugins.kubernetes.KubernetesTestUtil;
import org.csanchez.jenkins.plugins.kubernetes.MetricNames;
import org.csanchez.jenkins.plugins.kubernetes.PodAnnotation;
import org.csanchez.jenkins.plugins.kubernetes.PodTemplate;
import org.csanchez.jenkins.plugins.kubernetes.PodTemplateUtils;
import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecorator;
import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecoratorException;
import org.hamcrest.MatcherAssert;
import org.htmlunit.html.DomNodeUtil;
import org.htmlunit.html.HtmlElement;
Expand All @@ -90,6 +93,7 @@
import org.jenkinsci.plugins.workflow.job.WorkflowRun;
import org.jenkinsci.plugins.workflow.steps.durable_task.DurableTaskStep;
import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep;
import org.jetbrains.annotations.NotNull;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
Expand All @@ -103,6 +107,7 @@
import org.jvnet.hudson.test.JenkinsRuleNonLocalhost;
import org.jvnet.hudson.test.LoggerRule;
import org.jvnet.hudson.test.MockAuthorizationStrategy;
import org.jvnet.hudson.test.TestExtension;

public class KubernetesPipelineTest extends AbstractKubernetesPipelineTest {

Expand Down Expand Up @@ -928,4 +933,20 @@ public void handleEviction() throws Exception {
SemaphoreStep.success("pod/2", null);
r.assertBuildStatusSuccess(r.waitForCompletion(b));
}

@Test
public void decoratorFailure() throws Exception {
r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
r.assertLogContains("I always fail", b);
assertThat("Node should have been removed", r.jenkins.getNodes(), empty());
}

@TestExtension("decoratorFailure")
public static class DecoratorImpl implements PodDecorator {
@NotNull
@Override
public Pod decorate(@NotNull KubernetesCloud kubernetesCloud, @NotNull Pod pod) {
throw new PodDecoratorException("I always fail");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
podTemplate {
node(POD_LABEL) {
sh 'true'
}
}

0 comments on commit b211060

Please sign in to comment.