Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JENKINS-68126] Remove event streaming causing performance problems at scale #1192

Merged
merged 11 commits into from
Jun 8, 2022

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,13 @@
import io.fabric8.kubernetes.api.model.Pod;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.fabric8.kubernetes.client.KubernetesClientException;
import io.fabric8.kubernetes.client.Watch;
import io.fabric8.kubernetes.client.KubernetesClientTimeoutException;
import io.fabric8.kubernetes.client.internal.readiness.Readiness;
import jenkins.metrics.api.Metrics;
import org.apache.commons.lang.StringUtils;
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.Reaper;
import org.kohsuke.stapler.DataBoundConstructor;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -46,10 +52,6 @@
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import jenkins.metrics.api.Metrics;
import org.apache.commons.lang.StringUtils;
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.Reaper;
import org.kohsuke.stapler.DataBoundConstructor;

import static java.util.logging.Level.FINE;
import static java.util.logging.Level.INFO;
Expand All @@ -62,9 +64,6 @@ public class KubernetesLauncher extends JNLPLauncher {
// Report progress every 30 seconds
private static final long REPORT_INTERVAL = TimeUnit.SECONDS.toMillis(30L);

@CheckForNull
private transient AllContainersRunningPodWatcher watcher;

private static final Logger LOGGER = Logger.getLogger(KubernetesLauncher.class.getName());

private boolean launched;
Expand Down Expand Up @@ -166,12 +165,12 @@ else if (httpCode == 409 && e.getMessage().contains("Operation cannot be fulfill
ObjectMeta podMetadata = pod.getMetadata();
template.getWorkspaceVolume().createVolume(client, podMetadata);
template.getVolumes().forEach(volume -> volume.createVolume(client, podMetadata));
watcher = new AllContainersRunningPodWatcher(client, pod);
try (Watch w1 = client.pods().inNamespace(namespace).withName(podName).watch(watcher);
Watch w2 = eventWatch(client, podName, namespace, runListener)) {
assert watcher != null; // assigned 3 lines above
watcher.await(template.getSlaveConnectTimeout(), TimeUnit.SECONDS);

Pod checkPod = client.pods().inNamespace(namespace).withName(podName).waitUntilReady(template.getSlaveConnectTimeout(), TimeUnit.SECONDS);
if (!Readiness.isPodReady(checkPod)) {
throw new KubernetesClientTimeoutException(pod, template.getSlaveConnectTimeout(), TimeUnit.SECONDS);
}
Vlatombe marked this conversation as resolved.
Show resolved Hide resolved

LOGGER.log(INFO, () -> "Pod is running: " + cloudName + " " + namespace + "/" + podName);

// We need the pod to be running and connected before returning
Expand Down Expand Up @@ -256,17 +255,7 @@ else if (httpCode == 409 && e.getMessage().contains("Operation cannot be fulfill
Metrics.metricRegistry().counter(MetricNames.PODS_LAUNCHED).inc();
} catch (Throwable ex) {
setProblem(ex);
if (ex instanceof AllContainersRunningPodWatcher.PodNotRunningException) {
Throwable[] suppressed = ex.getSuppressed();
if (suppressed.length > 0 && suppressed[0] instanceof ContainerLogs) {
runListener.getLogger().println("Unable to provision agent " + node.getNodeName() + " :");
runListener.getLogger().print(suppressed[0].getMessage());
}
LOGGER.log(Level.WARNING, String.format("Error in provisioning: %s; agent=%s, template=%s", ex.getMessage(), node, template));
LOGGER.log(Level.FINE, null, ex);
} else {
LOGGER.log(Level.WARNING, String.format("Error in provisioning; agent=%s, template=%s", node, template), ex);
}
LOGGER.log(Level.WARNING, String.format("Error in provisioning; agent=%s, template=%s", node, template), ex);
LOGGER.log(Level.FINER, "Removing Jenkins node: {0}", node.getNodeName());
try {
node.terminate();
Expand All @@ -277,15 +266,6 @@ else if (httpCode == 409 && e.getMessage().contains("Operation cannot be fulfill
}
}

private Watch eventWatch(KubernetesClient client, String podName, String namespace, TaskListener runListener) {
try {
return client.v1().events().inNamespace(namespace).withField("involvedObject.name", podName).watch(new TaskListenerEventWatcher(podName, runListener));
} catch (KubernetesClientException e) {
LOGGER.log(Level.INFO, e, () -> "Cannot watch events on " + namespace + "/" +podName);
}
return () -> {};
}

private void checkTerminatedContainers(List<ContainerStatus> terminatedContainers, String podId, String namespace,
KubernetesSlave slave, KubernetesClient client) {
if (!terminatedContainers.isEmpty()) {
Expand Down Expand Up @@ -330,8 +310,4 @@ public void setProblem(@CheckForNull Throwable problem) {
this.problem = problem;
}

public AllContainersRunningPodWatcher getWatcher() {
return watcher;
}

}
Loading