Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JENKINS-74992] Print relevant pod status changes in build logs #1627

Merged
merged 14 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.commons.lang.StringUtils.isEmpty;
import static org.csanchez.jenkins.plugins.kubernetes.PodTemplateUtils.sanitizeLabel;

import com.cloudbees.plugins.credentials.CredentialsMatchers;
import com.cloudbees.plugins.credentials.common.StandardCredentials;
Expand Down Expand Up @@ -29,9 +30,11 @@
import hudson.util.FormValidation;
import hudson.util.ListBoxModel;
import hudson.util.XStream2;
import io.fabric8.kubernetes.api.model.Pod;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.fabric8.kubernetes.client.KubernetesClientException;
import io.fabric8.kubernetes.client.VersionInfo;
import io.fabric8.kubernetes.client.informers.SharedIndexInformer;
import java.io.IOException;
import java.io.StringReader;
import java.net.ConnectException;
Expand All @@ -49,10 +52,13 @@
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.servlet.ServletException;
Expand All @@ -69,6 +75,7 @@
import org.csanchez.jenkins.plugins.kubernetes.pipeline.PodTemplateMap;
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.Default;
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.PodRetention;
import org.csanchez.jenkins.plugins.kubernetes.watch.PodStatusEventHandler;
import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuth;
import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException;
import org.jenkinsci.plugins.plaincredentials.impl.StringCredentialsImpl;
Expand Down Expand Up @@ -161,6 +168,12 @@ public class KubernetesCloud extends Cloud implements PodTemplateGroup {
@CheckForNull
private GarbageCollection garbageCollection;

/**
* namespace -> informer
* Use to watch pod events per namespace.
*/
private transient Map<String, SharedIndexInformer<Pod>> informers = new ConcurrentHashMap<>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to be careful on what would happen to these informers if/when a KubernetesCloud gets removed.

Copy link
Member Author

@amuniz amuniz Dec 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT the informers map would be unreachable and it would be garbage collected, so the DefaultSharedIndexInformer would be collected too and with it the informerExecutor (each instance has an executor), so the resync would stop happening (I've checked it actually stops reporting events).


@DataBoundConstructor
public KubernetesCloud(String name) {
super(name);
Expand Down Expand Up @@ -1293,6 +1306,9 @@ private Object readResolve() {
if (containerCap != null && containerCap == 0) {
containerCap = null;
}
if (informers == null) {
informers = new ConcurrentHashMap<>();
}
return this;
}

Expand All @@ -1304,6 +1320,34 @@ public Cloud reconfigure(@NonNull StaplerRequest req, JSONObject form) throws De
return newInstance;
}

public void registerPodInformer(KubernetesSlave node) {
informers.computeIfAbsent(node.getNamespace(), (n) -> {
KubernetesClient client;
try {
client = connect();
} catch (KubernetesAuthException | IOException e) {
LOGGER.log(
Level.WARNING,
"Cannot connect to K8s cloud. Pod events will not be available in build logs.",
e);
return null;
}
Map<String, String> labelsFilter = new HashMap<>(getPodLabelsMap());
String jenkinsUrlLabel = sanitizeLabel(getJenkinsUrlOrNull());
if (jenkinsUrlLabel != null) {
labelsFilter.put(PodTemplateBuilder.LABEL_KUBERNETES_CONTROLLER, jenkinsUrlLabel);
}
SharedIndexInformer<Pod> inform = client.pods()
.inNamespace(node.getNamespace())
.withLabels(labelsFilter)
.inform(new PodStatusEventHandler(), TimeUnit.SECONDS.toMillis(30));
LOGGER.info(String.format(
"Registered informer to watch pod events on namespace [%s], with labels [%s] on cloud [%s]",
namespace, labelsFilter, name));
return inform;
});
}

@Extension
public static class PodTemplateSourceImpl extends PodTemplateSource {
@NonNull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
.orElse(null);
node.setNamespace(namespace);

// register a namespace informer (if not registered yet) to show relevant pod events in build logs
cloud.registerPodInformer(node);

// if the controller was interrupted after creating the pod but before it connected back, then
// the pod might already exist and the creating logic must be skipped.
Pod existingPod =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package org.csanchez.jenkins.plugins.kubernetes.watch;

import hudson.model.Node;
import hudson.model.TaskListener;
import hudson.slaves.SlaveComputer;
import io.fabric8.kubernetes.api.model.ContainerState;
import io.fabric8.kubernetes.api.model.ContainerStatus;
import io.fabric8.kubernetes.api.model.Pod;
import io.fabric8.kubernetes.api.model.PodCondition;
import io.fabric8.kubernetes.client.informers.ResourceEventHandler;
import java.util.Optional;
import java.util.logging.Logger;
import jenkins.model.Jenkins;
import org.csanchez.jenkins.plugins.kubernetes.KubernetesSlave;

/**
* Process pod events and print relevant information in build logs.
* Registered as an informer in {@link org.csanchez.jenkins.plugins.kubernetes.KubernetesLauncher#launch(SlaveComputer, TaskListener)}).
*/
public class PodStatusEventHandler implements ResourceEventHandler<Pod> {

private static final Logger LOGGER = Logger.getLogger(PodStatusEventHandler.class.getName());

@Override
public void onUpdate(Pod unused, Pod pod) {
Optional<Node> found = Jenkins.get().getNodes().stream()
.filter(n -> n.getNodeName().equals(pod.getMetadata().getName()))
.findFirst();
if (found.isPresent()) {
final StringBuilder sb = new StringBuilder();
pod.getStatus().getContainerStatuses().forEach(s -> sb.append(formatContainerStatus(s)));
pod.getStatus()
.getConditions()
.forEach(c -> sb.append(formatPodStatus(c, pod.getStatus().getPhase(), sb)));
if (!sb.toString().isEmpty()) {
((KubernetesSlave) found.get())
.getRunListener()
.getLogger()
.println("[PodInfo] " + pod.getMetadata().getNamespace() + "/"
+ pod.getMetadata().getName() + sb);
}
} else {
LOGGER.fine(() -> "Event received for non-existent node: ["
+ pod.getMetadata().getName() + "]");
}
}

private String formatPodStatus(PodCondition c, String phase, StringBuilder sb) {
if (c.getReason() == null) {
// not interesting
return "";
}
String formatted = String.format("%n\tPod [%s][%s] %s", phase, c.getReason(), c.getMessage());
return sb.indexOf(formatted) == -1 ? formatted : "";
}

private String formatContainerStatus(ContainerStatus s) {
ContainerState state = s.getState();
if (state.getRunning() != null) {
// don't care about running
return "";
}
StringBuilder sb = new StringBuilder();
sb.append(String.format("%n\tContainer [%s]", s.getName()));
if (state.getTerminated() != null) {
String message = state.getTerminated().getMessage();
sb.append(String.format(
" terminated [%s] %s",
state.getTerminated().getReason(), message != null ? message : "No message"));
}
if (state.getWaiting() != null) {
String message = state.getWaiting().getMessage();
sb.append(String.format(
" waiting [%s] %s", state.getWaiting().getReason(), message != null ? message : "No message"));
}
return sb.toString();
}

@Override
public void onDelete(Pod pod, boolean deletedFinalStateUnknown) {
// no-op
}

@Override
public void onAdd(Pod pod) {
// no-op
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.csanchez.jenkins.plugins.kubernetes.pipeline;

import static org.junit.Assert.assertNotNull;

import hudson.model.Result;
import org.junit.Test;

public class PodProvisioningStatusLogsTest extends AbstractKubernetesPipelineTest {

@Test
public void podStatusErrorLogs() throws Exception {
assertNotNull(createJobThenScheduleRun());
// pod not schedulable
// build never finishes, so just checking the message and killing
r.waitForMessage("Pod [Pending][Unschedulable] 0/1 nodes are available", b);
b.doKill();
r.waitUntilNoActivity();
}

@Test
public void podStatusNoErrorLogs() throws Exception {
assertNotNull(createJobThenScheduleRun());
r.assertBuildStatusSuccess(r.waitForCompletion(b));
// regular logs when starting containers
r.assertLogContains("Container [jnlp] waiting [ContainerCreating]", b);
r.assertLogContains("Pod [Pending][ContainersNotReady] containers with unready status: [shell jnlp]", b);
}

@Test
public void containerStatusErrorLogs() throws Exception {
assertNotNull(createJobThenScheduleRun());
r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
// error starting container
r.assertLogContains("Container [shell] terminated [StartError]", b);
r.assertLogContains("exec: \"oops\": executable file not found", b);
r.assertLogContains("Pod [Running][ContainersNotReady] containers with unready status: [shell]", b);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//noinspection GrPackage
pipeline {
agent {
kubernetes {
yaml '''
apiVersion: v1
kind: Pod
spec:
containers:
- name: shell
image: ubuntu
command:
- oops
args:
- infinity
'''
}
}
stages {
stage('Run') {
steps {
sh 'hostname'
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//noinspection GrPackage
pipeline {
agent {
kubernetes {
yaml '''
apiVersion: v1
kind: Pod
spec:
containers:
- name: shell
image: ubuntu
command:
- sleep
args:
- infinity
nodeSelector:
disktype: ssd
'''
}
}
stages {
stage('Run') {
steps {
sh 'hostname'
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//noinspection GrPackage
pipeline {
agent {
kubernetes {
yaml '''
apiVersion: v1
kind: Pod
spec:
containers:
- name: shell
image: ubuntu
command:
- sleep
args:
- infinity
'''
}
}
stages {
stage('Run') {
steps {
sh 'hostname'
}
}
}
}
Loading