diff --git a/calrissian/job.py b/calrissian/job.py index f18b0e3..1f708d9 100644 --- a/calrissian/job.py +++ b/calrissian/job.py @@ -4,6 +4,7 @@ from cwltool.errors import WorkflowException, UnsupportedRequirement from calrissian.k8s import KubernetesClient, CompletionResult from calrissian.report import Reporter, TimedResourceReport +from cwltool.builder import Builder import logging import os import yaml @@ -188,7 +189,7 @@ def add_emptydir_volume_binding(self, name, target): class KubernetesPodBuilder(object): - def __init__(self, name, container_image, environment, volume_mounts, volumes, command_line, stdout, stderr, stdin, resources, labels, nodeselectors, security_context, serviceaccount): + def __init__(self, name, container_image, environment, volume_mounts, volumes, command_line, stdout, stderr, stdin, resources, labels, nodeselectors, security_context, serviceaccount, requirements=None, hints=None): self.name = name self.container_image = container_image self.environment = environment @@ -203,6 +204,8 @@ def __init__(self, name, container_image, environment, volume_mounts, volumes, c self.nodeselectors = nodeselectors self.security_context = security_context self.serviceaccount = serviceaccount + self.requirements = {} if requirements is None else requirements + self.hints = [] if hints is None else hints def pod_name(self): tag = random_tag() @@ -297,7 +300,7 @@ def resource_value(kind, cwl_value): return None def container_resources(self): - log.debug('Building resources spec from {}'.format(self.resources)) + log.debug(f'Building resources spec from {self.resources}') container_resources = {} for cwl_field, cwl_value in self.resources.items(): resource_bound = 'requests' @@ -307,6 +310,19 @@ def container_resources(self): if not container_resources.get(resource_bound): container_resources[resource_bound] = {} container_resources[resource_bound][resource_type] = resource_value + + # Add CUDA requirements from CWL + for requirement in self.hints: + if requirement["class"] in ['cwltool:CUDARequirement', 'http://commonwl.org/cwltool#CUDARequirement']: + log.debug('Adding CUDARequirement resources spec') + resource_bound = 'requests' + container_resources[resource_bound]['nvidia.com/gpu'] = str(requirement["cudaDeviceCountMin"]) + if "limits" in container_resources: + resource_bound = 'limits' + container_resources[resource_bound]['nvidia.com/gpu'] = str(requirement["cudaDeviceCountMax"]) + else: + container_resources['limits'] = {'nvidia.com/gpu': str(requirement["cudaDeviceCountMax"])} + return container_resources def pod_labels(self): @@ -553,7 +569,9 @@ def create_kubernetes_runtime(self, runtimeContext): self.get_pod_labels(runtimeContext), self.get_pod_nodeselectors(runtimeContext), self.get_security_context(runtimeContext), - self.get_pod_serviceaccount(runtimeContext) + self.get_pod_serviceaccount(runtimeContext), + self.builder.requirements, + self.builder.hints ) built = k8s_builder.build() log.debug('{}\n{}{}\n'.format('-' * 80, yaml.dump(built), '-' * 80)) @@ -692,4 +710,4 @@ def append_volume(self, runtime, source, target, writable=False): This is called by the base class for file literals after they've been created. We already have a similar function, so we just call that. """ - self._add_volume_binding(source, target, writable) + self._add_volume_binding(source, target, writable) \ No newline at end of file diff --git a/tests/test_job.py b/tests/test_job.py index 488e51e..9e6b9ae 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -8,7 +8,7 @@ from calrissian.context import CalrissianRuntimeContext from calrissian.k8s import CompletionResult import threading - +from collections import OrderedDict class SafeNameTestCase(TestCase): @@ -343,6 +343,37 @@ def test_container_resources(self): } } self.assertEqual(expected, resources) + + def test_gpu_hints(self): + self.pod_builder.resources = {'cores': 2, 'ram': 256} + self.pod_builder.hints = [OrderedDict([("class", "cwltool:CUDARequirement"), ("cudaVersionMin", '10.0'), ("cudaComputeCapability", '3.0'), ("cudaDeviceCountMin", 1), ("cudaDeviceCountMax", 1)])] + + resources = self.pod_builder.container_resources() + expected = { + 'requests': { + 'cpu': '2', + 'memory': '256Mi', + 'nvidia.com/gpu': '1' + }, + "limits": { + 'nvidia.com/gpu': '1' + } + } + self.assertEqual(expected, resources) + self.pod_builder.hints = [OrderedDict([("class", "cwltool:CUDARequirement"), ("cudaVersionMin", '10.0'), ("cudaComputeCapability", '3.0'), ("cudaDeviceCountMin", 2), ("cudaDeviceCountMax", 4)])] + + resources = self.pod_builder.container_resources() + expected = { + 'requests': { + 'cpu': '2', + 'memory': '256Mi', + 'nvidia.com/gpu': '2' + }, + "limits": { + 'nvidia.com/gpu': '4' + } + } + self.assertEqual(expected, resources) def test_string_labels(self): self.pod_builder.labels = {'key1': 123} @@ -637,7 +668,9 @@ def realpath(path): mock_read_yaml.return_value, mock_read_yaml.return_value, job.get_security_context(mock_runtime_context), - None + None, + job.builder.requirements, + job.builder.hints, )) # calls builder.build # returns that