From 4256c901089c7a5af2a517ab37b2cb8a3392ea91 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 9 Nov 2018 17:18:51 -0800 Subject: [PATCH 01/11] add requirement to component image build --- sdk/python/kfp/compiler/__init__.py | 2 +- sdk/python/kfp/compiler/_component_builder.py | 110 ++++++++++++++++-- 2 files changed, 101 insertions(+), 11 deletions(-) diff --git a/sdk/python/kfp/compiler/__init__.py b/sdk/python/kfp/compiler/__init__.py index 5c3c567c2c2..6057febc9fe 100644 --- a/sdk/python/kfp/compiler/__init__.py +++ b/sdk/python/kfp/compiler/__init__.py @@ -14,4 +14,4 @@ from .compiler import Compiler -from ._component_builder import build_python_component, build_docker_image \ No newline at end of file +from ._component_builder import build_python_component, build_docker_image, DependencyVersion \ No newline at end of file diff --git a/sdk/python/kfp/compiler/_component_builder.py b/sdk/python/kfp/compiler/_component_builder.py index 830563206d8..448b20819a9 100644 --- a/sdk/python/kfp/compiler/_component_builder.py +++ b/sdk/python/kfp/compiler/_component_builder.py @@ -58,6 +58,77 @@ def download_gcs_blob(local_path, gcs_path): blob = bucket.blob(gcs_blob) blob.download_to_filename(local_path) +class DependencyVersion(object): + """ DependencyVersion specifies the versions """ + def __init__(self, version=None, min_version=None, max_version=None): + """ if version is specified, no need for min_version or max_version; + if both are specified, version is adopted """ + if version is not None: + self._min_version = version + self._max_version = version + else: + self._min_version = min_version + self._max_version = max_version + + @property + def min_version(self): + return self._min_version + + @min_version.setter + def min_version(self, min_version): + self._min_version = min_version + + def has_min_version(self): + return self._min_version != None + + @property + def max_version(self): + return self._max_version + + @max_version.setter + def max_version(self, max_version): + self._max_version = max_version + + def has_max_version(self): + return self._max_version != None + + def has_versions(self): + return (not self.has_min_version()) and (not self.has_max_version()) + + +class DependencyHelper(object): + """ DependencyHelper manages software dependency information """ + def __init__(self): + self._PYTHON_PACKAGE = 'PYTHON_PACKAGE' + self._dependency = {self.PYTHON_PACKAGE:{}} + + @property + def python_pacakge(self): + return self._dependency[self.self._PYTHON_PACKAGE] + + def add_python_package(self, name, version=DependencyVersion(), override=True): + """ add_single_python_package adds a dependency for the python package + + Args: + name: package name + version: it could be a specific version(1.10.0), or a range(>=1.0,<=2.0) + if not specified, the default is resolved automatically by the pip system. + override: whether to override the version if already existing in the dependency. + """ + if name in self.python_pacakge and not override: + return + self.python_pacakge[name] = version + + def generate_pip_requirements(self, target_file): + """ write the python packages to a requirement file """ + with open(target_file, 'w') as f: + for name, version in self.python_pacakge.items(): + version_str = '' + if version.has_min_version(): + version += ' >= ' + version.min_version + if version.has_max_version(): + version += ', <= ' + version.max_version + f.write(name + version) class DockerfileHelper(object): """ Dockerfile Helper generates a tarball with dockerfile, ready for docker build @@ -67,13 +138,22 @@ class DockerfileHelper(object): def __init__(self, arc_dockerfile_name, gcs_path): self._arc_dockerfile_name = arc_dockerfile_name self._gcs_path = gcs_path + self._ARC_REQUIREMENT_FILE = 'requirements.txt' + + def _generate_pip_requirement(self, dependency, requirement_filepath): + dependency_helper = DependencyHelper() + for name, version in dependency.items(): + dependency_helper.add_python_package(name, version) + dependency_helper.generate_pip_requirements(requirement_filepath) - def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath): + def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath, has_requirement_file): """ _generate_docker_file generates a simple dockerfile with the python path """ with open(target_file, 'w') as f: f.write('FROM ' + base_image + '\n') f.write('RUN apt-get update -y && apt-get install --no-install-recommends -y -q python3 python3-pip python3-setuptools\n') f.write('RUN pip3 install fire\n') + if has_requirement_file: + f.write('RUN pip3 install -r ' + self._ARC_REQUIREMENT_FILE) f.write('ADD ' + python_filepath + " /ml/" + '\n') f.write('ENTRYPOINT ["python3", "/ml/' + python_filepath + '"]') @@ -86,14 +166,22 @@ def _wrap_files_in_tarball(self, tarball_path, files={}): for key, value in files.items(): tarball.add(value, arcname=key) - def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image): + def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, dependency): """ prepare_docker_tarball is the API to generate dockerfile and prepare the tarball with python scripts """ with tempfile.TemporaryDirectory() as local_build_dir: + has_requirement_file = False + local_requirement_path = os.path.join(local_build_dir, self._ARC_REQUIREMENT_FILE) + if dependency is not None and len(dependency) != 0: + self._generate_pip_requirement(dependency, local_requirement_path) + has_requirement_file = True local_dockerfile_path = os.path.join(local_build_dir, self._arc_dockerfile_name) - self._generate_dockerfile_with_py(local_dockerfile_path, base_image, arc_python_filename) + self._generate_dockerfile_with_py(local_dockerfile_path, base_image, arc_python_filename, has_requirement_file) local_tarball_path = os.path.join(local_build_dir, 'docker.tmp.tar.gz') - self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:local_dockerfile_path, - arc_python_filename:python_filepath}) + file_lists = {self._arc_dockerfile_name:local_dockerfile_path, + arc_python_filename:python_filepath} + if has_requirement_file: + file_lists[self._ARC_REQUIREMENT_FILE] = local_requirement_path + self._wrap_files_in_tarball(local_tarball_path, file_lists) GCSHelper.upload_gcs_file(local_tarball_path, self._gcs_path) def prepare_docker_tarball(self, dockerfile_path): @@ -246,7 +334,7 @@ def _generate_entrypoint(self, component_func): complete_component_code = dedecorated_component_src + '\n' + wrapper_code + '\n' + codegen.end() return complete_component_code - def build_image_from_func(self, component_func, namespace, base_image, timeout): + def build_image_from_func(self, component_func, namespace, base_image, timeout, dependency): """ build_image builds an image for the given python function""" # Generate entrypoint and serialization python codes @@ -262,7 +350,7 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout): docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name, gcs_path=self._gcs_path) docker_helper.prepare_docker_tarball_with_py(python_filepath=local_python_filepath, arc_python_filename=self._arc_python_filepath, - base_image=base_image) + base_image=base_image, dependency=dependency) kaniko_spec = self._generate_kaniko_spec(namespace=namespace, arc_dockerfile_name=self._arc_dockerfile_name, @@ -322,16 +410,18 @@ def _generate_pythonop(component_func, target_image): component_artifact['implementation']['dockerContainer']['arguments'].append({'value': input}) return _create_task_factory_from_component_dict(component_artifact) -def build_python_component(component_func, staging_gcs_path, target_image, build_image=True, timeout=600, namespace='kubeflow'): +def build_python_component(component_func, staging_gcs_path, target_image, build_image=True, timeout=600, namespace='kubeflow', dependency={}): """ build_component automatically builds a container image for the component_func based on the base_image and pushes to the target_image. Args: component_func (python function): The python function to build components upon staging_gcs_path (str): GCS blob that can store temporary build files + target_image (str): target image path + build_image (bool): whether to build the image or not. Default is True. timeout (int): the timeout for the image build(in secs), default is 600 seconds namespace (str): the namespace within which to run the kubernetes kaniko job, default is "kubeflow" - build_image (bool): whether to build the image or not. Default is True. + dependency (dict): a dictionary with key as the package name and value as the DependencyVersion, default is empty Raises: ValueError: The function is not decorated with python_component decorator @@ -351,7 +441,7 @@ def build_python_component(component_func, staging_gcs_path, target_image, build if build_image: builder = ImageBuilder(gcs_base=staging_gcs_path, target_image=target_image) builder.build_image_from_func(component_func, namespace=namespace, - base_image=component_meta['base_image'], timeout=timeout) + base_image=component_meta['base_image'], timeout=timeout, dependency) logging.info('Build component complete.') return _generate_pythonop(component_func, target_image) From 090e17daf79a3330b5f6d4cf75a8d90682648c4f Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 9 Nov 2018 18:11:17 -0800 Subject: [PATCH 02/11] add unit test for DependencyVersion --- .../tests/compiler/component_builder_test.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index d9f1a78b80f..a1833c45a85 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -16,6 +16,7 @@ from kfp.compiler._component_builder import DockerfileHelper from kfp.compiler._component_builder import CodeGenerator from kfp.compiler._component_builder import ImageBuilder +from kfp.compiler._component_builder import DependencyVersion import os import unittest @@ -49,6 +50,33 @@ def test_upload_gcs_path(self): os.remove(temp_file) os.remove(temp_downloaded_file) +class TestDependencyVersion(unittest.TestCase): + + def test_version(self): + """ test version overrides min_version and max_version """ + version = DependencyVersion(version='0.3.0', min_version='0.1.0', max_version='0.4.0') + self.assertTrue(version.min_version == '0.3.0') + self.assertTrue(version.max_version == '0.3.0') + self.assertTrue(version.has_versions()) + self.assertTrue(version.has_min_version()) + self.assertTrue(version.has_max_version()) + + def test_minmax_version(self): + """ test if min_version and max_version are configured when version is not given """ + version = DependencyVersion(min_version='0.1.0', max_version='0.4.0') + self.assertTrue(version.min_version == '0.1.0') + self.assertTrue(version.max_version == '0.4.0') + self.assertTrue(version.has_versions()) + self.assertTrue(version.has_min_version()) + self.assertTrue(version.has_max_version()) + + def test_no_version(self): + """ test the no version scenario """ + version = DependencyVersion() + self.assertFalse(version.has_min_version()) + self.assertFalse(version.has_max_version()) + self.assertFalse(version.has_versions()) + class TestDockerfileHelper(unittest.TestCase): def test_wrap_files_in_tarball(self): From d7253bca93f94e78074260d0781ff323711a02d2 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Mon, 12 Nov 2018 13:57:21 -0800 Subject: [PATCH 03/11] add unit tests, fix bugs --- sdk/python/kfp/compiler/_component_builder.py | 30 ++++---- .../tests/compiler/component_builder_test.py | 71 +++++++++++++++++-- sdk/python/tests/compiler/main.py | 4 +- 3 files changed, 85 insertions(+), 20 deletions(-) diff --git a/sdk/python/kfp/compiler/_component_builder.py b/sdk/python/kfp/compiler/_component_builder.py index 448b20819a9..a658894c577 100644 --- a/sdk/python/kfp/compiler/_component_builder.py +++ b/sdk/python/kfp/compiler/_component_builder.py @@ -19,6 +19,7 @@ import re import tempfile import logging +from collections import OrderedDict from google.cloud import storage from pathlib import PurePath, Path from kfp import dsl @@ -93,18 +94,18 @@ def has_max_version(self): return self._max_version != None def has_versions(self): - return (not self.has_min_version()) and (not self.has_max_version()) + return (self.has_min_version()) or (self.has_max_version()) class DependencyHelper(object): """ DependencyHelper manages software dependency information """ def __init__(self): self._PYTHON_PACKAGE = 'PYTHON_PACKAGE' - self._dependency = {self.PYTHON_PACKAGE:{}} + self._dependency = {self._PYTHON_PACKAGE:OrderedDict()} @property - def python_pacakge(self): - return self._dependency[self.self._PYTHON_PACKAGE] + def python_package(self): + return self._dependency[self._PYTHON_PACKAGE] def add_python_package(self, name, version=DependencyVersion(), override=True): """ add_single_python_package adds a dependency for the python package @@ -115,20 +116,21 @@ def add_python_package(self, name, version=DependencyVersion(), override=True): if not specified, the default is resolved automatically by the pip system. override: whether to override the version if already existing in the dependency. """ - if name in self.python_pacakge and not override: + if name in self.python_package and not override: return - self.python_pacakge[name] = version + self.python_package[name] = version def generate_pip_requirements(self, target_file): - """ write the python packages to a requirement file """ + """ write the python packages to a requirement file + the generated file follows the order of which the packages are added """ with open(target_file, 'w') as f: - for name, version in self.python_pacakge.items(): + for name, version in self.python_package.items(): version_str = '' if version.has_min_version(): - version += ' >= ' + version.min_version + version_str += ' >= ' + version.min_version if version.has_max_version(): - version += ', <= ' + version.max_version - f.write(name + version) + version_str += ', <= ' + version.max_version + f.write(name + version_str + '\n') class DockerfileHelper(object): """ Dockerfile Helper generates a tarball with dockerfile, ready for docker build @@ -153,7 +155,7 @@ def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath, f.write('RUN apt-get update -y && apt-get install --no-install-recommends -y -q python3 python3-pip python3-setuptools\n') f.write('RUN pip3 install fire\n') if has_requirement_file: - f.write('RUN pip3 install -r ' + self._ARC_REQUIREMENT_FILE) + f.write('RUN pip3 install -r ' + self._ARC_REQUIREMENT_FILE + '\n') f.write('ADD ' + python_filepath + " /ml/" + '\n') f.write('ENTRYPOINT ["python3", "/ml/' + python_filepath + '"]') @@ -166,7 +168,7 @@ def _wrap_files_in_tarball(self, tarball_path, files={}): for key, value in files.items(): tarball.add(value, arcname=key) - def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, dependency): + def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, dependency=None): """ prepare_docker_tarball is the API to generate dockerfile and prepare the tarball with python scripts """ with tempfile.TemporaryDirectory() as local_build_dir: has_requirement_file = False @@ -441,7 +443,7 @@ def build_python_component(component_func, staging_gcs_path, target_image, build if build_image: builder = ImageBuilder(gcs_base=staging_gcs_path, target_image=target_image) builder.build_image_from_func(component_func, namespace=namespace, - base_image=component_meta['base_image'], timeout=timeout, dependency) + base_image=component_meta['base_image'], timeout=timeout, dependency=dependency) logging.info('Build component complete.') return _generate_pythonop(component_func, target_image) diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index a1833c45a85..3e7e82efdb5 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -17,6 +17,7 @@ from kfp.compiler._component_builder import CodeGenerator from kfp.compiler._component_builder import ImageBuilder from kfp.compiler._component_builder import DependencyVersion +from kfp.compiler._component_builder import DependencyHelper import os import unittest @@ -24,6 +25,7 @@ import tarfile from pathlib import Path import inspect +from collections import OrderedDict GCS_BASE = 'gs://ngao-mlpipeline-testing/' @@ -77,6 +79,29 @@ def test_no_version(self): self.assertFalse(version.has_max_version()) self.assertFalse(version.has_versions()) +class TestDependencyHelper(unittest.TestCase): + + def test_generate_requirement(self): + """ Test generating requirement file """ + + # prepare + test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') + temp_file = os.path.join(test_data_dir, 'test_requirements.tmp') + + dependency_helper = DependencyHelper() + dependency_helper.add_python_package(name='tensorflow', version=DependencyVersion(min_version='0.10.0', max_version='0.11.0')) + dependency_helper.add_python_package(name='kubernetes', version=DependencyVersion(min_version='0.6.0')) + dependency_helper.generate_pip_requirements(temp_file) + + golden_requirement_payload = '''\ +tensorflow >= 0.10.0, <= 0.11.0 +kubernetes >= 0.6.0 +''' + with open(temp_file, 'r') as f: + target_requirement_payload = f.read() + self.assertEqual(target_requirement_payload, golden_requirement_payload) + os.remove(temp_file) + class TestDockerfileHelper(unittest.TestCase): def test_wrap_files_in_tarball(self): @@ -94,7 +119,7 @@ def test_wrap_files_in_tarball(self): # check docker_helper = DockerfileHelper(arc_dockerfile_name='', gcs_path='') - self.assertTrue(docker_helper._wrap_files_in_tarball(temp_tarball, {'dockerfile':temp_file_one, 'main.py':temp_file_two})) + docker_helper._wrap_files_in_tarball(temp_tarball, {'dockerfile':temp_file_one, 'main.py':temp_file_two}) self.assertTrue(os.path.exists(temp_tarball)) temp_tarball_handler = tarfile.open(temp_tarball) temp_files = temp_tarball_handler.getmembers() @@ -113,19 +138,30 @@ def test_generate_dockerfile(self): # prepare test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') target_dockerfile = os.path.join(test_data_dir, 'component.temp.dockerfile') - golden_dockerfile_payload = '''\ + golden_dockerfile_payload_one = '''\ FROM gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0 RUN apt-get update -y && apt-get install --no-install-recommends -y -q python3 python3-pip python3-setuptools RUN pip3 install fire ADD main.py /ml/ +ENTRYPOINT ["python3", "/ml/main.py"]''' + golden_dockerfile_payload_two = '''\ +FROM gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0 +RUN apt-get update -y && apt-get install --no-install-recommends -y -q python3 python3-pip python3-setuptools +RUN pip3 install fire +RUN pip3 install -r requirements.txt +ADD main.py /ml/ ENTRYPOINT ["python3", "/ml/main.py"]''' # check docker_helper = DockerfileHelper(arc_dockerfile_name=target_dockerfile, gcs_path='') - docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py') + docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py', has_requirement_file=False) + with open(target_dockerfile, 'r') as f: + target_dockerfile_payload = f.read() + self.assertEqual(target_dockerfile_payload, golden_dockerfile_payload_one) + docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py', has_requirement_file=True) with open(target_dockerfile, 'r') as f: target_dockerfile_payload = f.read() - self.assertEqual(target_dockerfile_payload, golden_dockerfile_payload) + self.assertEqual(target_dockerfile_payload, golden_dockerfile_payload_two) # clean up os.remove(target_dockerfile) @@ -153,6 +189,33 @@ def test_prepare_docker_with_py(self): os.remove(downloaded_tarball) GCSHelper.remove_gcs_blob(gcs_tar_path) + def test_prepare_docker_with_py_and_dependency(self): + """ Test the whole prepare docker from python function and dependencies """ + + # prepare + test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') + python_filepath = os.path.join(test_data_dir, 'basic.py') + downloaded_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz') + gcs_tar_path = os.path.join(GCS_BASE, 'test_docker.tar.gz') + + # check + docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile', gcs_path=gcs_tar_path) + dependencies = { + 'tensorflow': DependencyVersion(min_version='0.10.0', max_version='0.11.0'), + 'kubernetes': DependencyVersion(min_version='0.6.0'), + } + docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0', dependency=dependencies) + GCSHelper.download_gcs_blob(downloaded_tarball, gcs_tar_path) + temp_tarball_handler = tarfile.open(downloaded_tarball) + temp_files = temp_tarball_handler.getmembers() + self.assertTrue(len(temp_files) == 3) + for temp_file in temp_files: + self.assertTrue(temp_file.name in ['dockerfile', 'main.py', 'requirements.txt']) + + # clean up + os.remove(downloaded_tarball) + GCSHelper.remove_gcs_blob(gcs_tar_path) + def test_prepare_docker_tarball(self): """ Test the whole prepare docker tarball """ diff --git a/sdk/python/tests/compiler/main.py b/sdk/python/tests/compiler/main.py index 598f75551cf..85472a9c560 100644 --- a/sdk/python/tests/compiler/main.py +++ b/sdk/python/tests/compiler/main.py @@ -22,9 +22,9 @@ if __name__ == '__main__': suite = unittest.TestSuite() - suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(compiler_tests)) + #suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(compiler_tests)) #TODO: enable the test once a gcs mock class is created. - #suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) + suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) runner = unittest.TextTestRunner() if not runner.run(suite).wasSuccessful(): sys.exit(1) From 06a37353476e0b876dfed89604149d345b6b6c54 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Mon, 12 Nov 2018 14:01:04 -0800 Subject: [PATCH 04/11] revert some unit test change --- sdk/python/tests/compiler/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/compiler/main.py b/sdk/python/tests/compiler/main.py index 85472a9c560..598f75551cf 100644 --- a/sdk/python/tests/compiler/main.py +++ b/sdk/python/tests/compiler/main.py @@ -22,9 +22,9 @@ if __name__ == '__main__': suite = unittest.TestSuite() - #suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(compiler_tests)) + suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(compiler_tests)) #TODO: enable the test once a gcs mock class is created. - suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) + #suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) runner = unittest.TextTestRunner() if not runner.run(suite).wasSuccessful(): sys.exit(1) From 000ca3ed9906f1706227462236db5a4ab37eaedc Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Tue, 13 Nov 2018 10:55:39 -0800 Subject: [PATCH 05/11] include the package name inside the VersionedDependency, enable the unit test for the component builder --- sdk/python/kfp/compiler/__init__.py | 2 +- sdk/python/kfp/compiler/_component_builder.py | 27 +++++++++------- .../tests/compiler/component_builder_test.py | 32 +++++++++++++------ sdk/python/tests/compiler/main.py | 3 +- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/sdk/python/kfp/compiler/__init__.py b/sdk/python/kfp/compiler/__init__.py index 6057febc9fe..f397748ebaa 100644 --- a/sdk/python/kfp/compiler/__init__.py +++ b/sdk/python/kfp/compiler/__init__.py @@ -14,4 +14,4 @@ from .compiler import Compiler -from ._component_builder import build_python_component, build_docker_image, DependencyVersion \ No newline at end of file +from ._component_builder import build_python_component, build_docker_image, VersionedDependency \ No newline at end of file diff --git a/sdk/python/kfp/compiler/_component_builder.py b/sdk/python/kfp/compiler/_component_builder.py index a658894c577..c513c91e091 100644 --- a/sdk/python/kfp/compiler/_component_builder.py +++ b/sdk/python/kfp/compiler/_component_builder.py @@ -59,11 +59,12 @@ def download_gcs_blob(local_path, gcs_path): blob = bucket.blob(gcs_blob) blob.download_to_filename(local_path) -class DependencyVersion(object): +class VersionedDependency(object): """ DependencyVersion specifies the versions """ - def __init__(self, version=None, min_version=None, max_version=None): + def __init__(self, name, version=None, min_version=None, max_version=None): """ if version is specified, no need for min_version or max_version; if both are specified, version is adopted """ + self._name = name if version is not None: self._min_version = version self._max_version = version @@ -71,6 +72,10 @@ def __init__(self, version=None, min_version=None, max_version=None): self._min_version = min_version self._max_version = max_version + @property + def name(self): + return self._name + @property def min_version(self): return self._min_version @@ -104,10 +109,10 @@ def __init__(self): self._dependency = {self._PYTHON_PACKAGE:OrderedDict()} @property - def python_package(self): + def python_packages(self): return self._dependency[self._PYTHON_PACKAGE] - def add_python_package(self, name, version=DependencyVersion(), override=True): + def add_python_package(self, version=VersionedDependency(name='default'), override=True): """ add_single_python_package adds a dependency for the python package Args: @@ -116,15 +121,15 @@ def add_python_package(self, name, version=DependencyVersion(), override=True): if not specified, the default is resolved automatically by the pip system. override: whether to override the version if already existing in the dependency. """ - if name in self.python_package and not override: + if version.name in self.python_packages and not override: return - self.python_package[name] = version + self.python_packages[version.name] = version def generate_pip_requirements(self, target_file): """ write the python packages to a requirement file the generated file follows the order of which the packages are added """ with open(target_file, 'w') as f: - for name, version in self.python_package.items(): + for name, version in self.python_packages.items(): version_str = '' if version.has_min_version(): version_str += ' >= ' + version.min_version @@ -144,8 +149,8 @@ def __init__(self, arc_dockerfile_name, gcs_path): def _generate_pip_requirement(self, dependency, requirement_filepath): dependency_helper = DependencyHelper() - for name, version in dependency.items(): - dependency_helper.add_python_package(name, version) + for version in dependency: + dependency_helper.add_python_package(version) dependency_helper.generate_pip_requirements(requirement_filepath) def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath, has_requirement_file): @@ -412,7 +417,7 @@ def _generate_pythonop(component_func, target_image): component_artifact['implementation']['dockerContainer']['arguments'].append({'value': input}) return _create_task_factory_from_component_dict(component_artifact) -def build_python_component(component_func, staging_gcs_path, target_image, build_image=True, timeout=600, namespace='kubeflow', dependency={}): +def build_python_component(component_func, staging_gcs_path, target_image, build_image=True, timeout=600, namespace='kubeflow', dependency=[]): """ build_component automatically builds a container image for the component_func based on the base_image and pushes to the target_image. @@ -423,7 +428,7 @@ def build_python_component(component_func, staging_gcs_path, target_image, build build_image (bool): whether to build the image or not. Default is True. timeout (int): the timeout for the image build(in secs), default is 600 seconds namespace (str): the namespace within which to run the kubernetes kaniko job, default is "kubeflow" - dependency (dict): a dictionary with key as the package name and value as the DependencyVersion, default is empty + dependency (list): a list of VersionedDependency, which includes the package name and versions, default is empty Raises: ValueError: The function is not decorated with python_component decorator diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index 3e7e82efdb5..05dd926ba51 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -16,7 +16,7 @@ from kfp.compiler._component_builder import DockerfileHelper from kfp.compiler._component_builder import CodeGenerator from kfp.compiler._component_builder import ImageBuilder -from kfp.compiler._component_builder import DependencyVersion +from kfp.compiler._component_builder import VersionedDependency from kfp.compiler._component_builder import DependencyHelper import os @@ -52,29 +52,43 @@ def test_upload_gcs_path(self): os.remove(temp_file) os.remove(temp_downloaded_file) -class TestDependencyVersion(unittest.TestCase): +class TestVersionedDependency(unittest.TestCase): def test_version(self): """ test version overrides min_version and max_version """ - version = DependencyVersion(version='0.3.0', min_version='0.1.0', max_version='0.4.0') + version = VersionedDependency(name='tensorflow', version='0.3.0', min_version='0.1.0', max_version='0.4.0') self.assertTrue(version.min_version == '0.3.0') self.assertTrue(version.max_version == '0.3.0') self.assertTrue(version.has_versions()) self.assertTrue(version.has_min_version()) self.assertTrue(version.has_max_version()) + self.assertTrue(version.name == 'tensorflow') def test_minmax_version(self): """ test if min_version and max_version are configured when version is not given """ - version = DependencyVersion(min_version='0.1.0', max_version='0.4.0') + version = VersionedDependency(name='tensorflow', min_version='0.1.0', max_version='0.4.0') self.assertTrue(version.min_version == '0.1.0') self.assertTrue(version.max_version == '0.4.0') self.assertTrue(version.has_versions()) self.assertTrue(version.has_min_version()) self.assertTrue(version.has_max_version()) + def test_min_or_max_version(self): + """ test if min_version and max_version are configured when version is not given """ + version = VersionedDependency(name='tensorflow', min_version='0.1.0') + self.assertTrue(version.min_version == '0.1.0') + self.assertTrue(version.has_versions()) + self.assertTrue(version.has_min_version()) + self.assertFalse(version.has_max_version()) + version = VersionedDependency(name='tensorflow', max_version='0.3.0') + self.assertTrue(version.max_version == '0.3.0') + self.assertTrue(version.has_versions()) + self.assertTrue(version.has_max_version()) + self.assertFalse(version.has_min_version()) + def test_no_version(self): """ test the no version scenario """ - version = DependencyVersion() + version = VersionedDependency(name='tensorflow') self.assertFalse(version.has_min_version()) self.assertFalse(version.has_max_version()) self.assertFalse(version.has_versions()) @@ -89,8 +103,8 @@ def test_generate_requirement(self): temp_file = os.path.join(test_data_dir, 'test_requirements.tmp') dependency_helper = DependencyHelper() - dependency_helper.add_python_package(name='tensorflow', version=DependencyVersion(min_version='0.10.0', max_version='0.11.0')) - dependency_helper.add_python_package(name='kubernetes', version=DependencyVersion(min_version='0.6.0')) + dependency_helper.add_python_package(version=VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0')) + dependency_helper.add_python_package(version=VersionedDependency(name='kubernetes', min_version='0.6.0')) dependency_helper.generate_pip_requirements(temp_file) golden_requirement_payload = '''\ @@ -201,8 +215,8 @@ def test_prepare_docker_with_py_and_dependency(self): # check docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile', gcs_path=gcs_tar_path) dependencies = { - 'tensorflow': DependencyVersion(min_version='0.10.0', max_version='0.11.0'), - 'kubernetes': DependencyVersion(min_version='0.6.0'), + VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0'), + VersionedDependency(name='kubernetes', min_version='0.6.0'), } docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0', dependency=dependencies) GCSHelper.download_gcs_blob(downloaded_tarball, gcs_tar_path) diff --git a/sdk/python/tests/compiler/main.py b/sdk/python/tests/compiler/main.py index 598f75551cf..710cedc00bc 100644 --- a/sdk/python/tests/compiler/main.py +++ b/sdk/python/tests/compiler/main.py @@ -23,8 +23,7 @@ if __name__ == '__main__': suite = unittest.TestSuite() suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(compiler_tests)) - #TODO: enable the test once a gcs mock class is created. - #suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) + suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) runner = unittest.TextTestRunner() if not runner.run(suite).wasSuccessful(): sys.exit(1) From ed4c21c16951918cf9340653da18ab98fd3afa4d Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Tue, 13 Nov 2018 10:57:19 -0800 Subject: [PATCH 06/11] disable the component image build, this is done in another PR --- sdk/python/tests/compiler/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/python/tests/compiler/main.py b/sdk/python/tests/compiler/main.py index 710cedc00bc..598f75551cf 100644 --- a/sdk/python/tests/compiler/main.py +++ b/sdk/python/tests/compiler/main.py @@ -23,7 +23,8 @@ if __name__ == '__main__': suite = unittest.TestSuite() suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(compiler_tests)) - suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) + #TODO: enable the test once a gcs mock class is created. + #suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) runner = unittest.TextTestRunner() if not runner.run(suite).wasSuccessful(): sys.exit(1) From 47a51a3a4317443d98a5a47cbba5612add3e59f9 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Tue, 13 Nov 2018 15:26:35 -0800 Subject: [PATCH 07/11] update the version name to dependency --- sdk/python/kfp/compiler/_component_builder.py | 6 +++--- sdk/python/tests/compiler/component_builder_test.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sdk/python/kfp/compiler/_component_builder.py b/sdk/python/kfp/compiler/_component_builder.py index c513c91e091..b48e5a05c3b 100644 --- a/sdk/python/kfp/compiler/_component_builder.py +++ b/sdk/python/kfp/compiler/_component_builder.py @@ -112,7 +112,7 @@ def __init__(self): def python_packages(self): return self._dependency[self._PYTHON_PACKAGE] - def add_python_package(self, version=VersionedDependency(name='default'), override=True): + def add_python_package(self, dependency, override=True): """ add_single_python_package adds a dependency for the python package Args: @@ -121,9 +121,9 @@ def add_python_package(self, version=VersionedDependency(name='default'), overri if not specified, the default is resolved automatically by the pip system. override: whether to override the version if already existing in the dependency. """ - if version.name in self.python_packages and not override: + if dependency.name in self.python_packages and not override: return - self.python_packages[version.name] = version + self.python_packages[dependency.name] = dependency def generate_pip_requirements(self, target_file): """ write the python packages to a requirement file diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index 05dd926ba51..a7d4c81db64 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -103,8 +103,8 @@ def test_generate_requirement(self): temp_file = os.path.join(test_data_dir, 'test_requirements.tmp') dependency_helper = DependencyHelper() - dependency_helper.add_python_package(version=VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0')) - dependency_helper.add_python_package(version=VersionedDependency(name='kubernetes', min_version='0.6.0')) + dependency_helper.add_python_package(dependency=VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0')) + dependency_helper.add_python_package(dependency=VersionedDependency(name='kubernetes', min_version='0.6.0')) dependency_helper.generate_pip_requirements(temp_file) golden_requirement_payload = '''\ From 4f464abd41fb3e770ad6f2c5e296712c80081729 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Thu, 15 Nov 2018 11:34:08 -0800 Subject: [PATCH 08/11] delete unuseful tests and add more useful unit tests --- .../tests/compiler/component_builder_test.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index a7d4c81db64..1a7ed105484 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -60,8 +60,6 @@ def test_version(self): self.assertTrue(version.min_version == '0.3.0') self.assertTrue(version.max_version == '0.3.0') self.assertTrue(version.has_versions()) - self.assertTrue(version.has_min_version()) - self.assertTrue(version.has_max_version()) self.assertTrue(version.name == 'tensorflow') def test_minmax_version(self): @@ -70,21 +68,15 @@ def test_minmax_version(self): self.assertTrue(version.min_version == '0.1.0') self.assertTrue(version.max_version == '0.4.0') self.assertTrue(version.has_versions()) - self.assertTrue(version.has_min_version()) - self.assertTrue(version.has_max_version()) def test_min_or_max_version(self): """ test if min_version and max_version are configured when version is not given """ version = VersionedDependency(name='tensorflow', min_version='0.1.0') self.assertTrue(version.min_version == '0.1.0') self.assertTrue(version.has_versions()) - self.assertTrue(version.has_min_version()) - self.assertFalse(version.has_max_version()) version = VersionedDependency(name='tensorflow', max_version='0.3.0') self.assertTrue(version.max_version == '0.3.0') self.assertTrue(version.has_versions()) - self.assertTrue(version.has_max_version()) - self.assertFalse(version.has_min_version()) def test_no_version(self): """ test the no version scenario """ @@ -110,6 +102,28 @@ def test_generate_requirement(self): golden_requirement_payload = '''\ tensorflow >= 0.10.0, <= 0.11.0 kubernetes >= 0.6.0 +''' + with open(temp_file, 'r') as f: + target_requirement_payload = f.read() + self.assertEqual(target_requirement_payload, golden_requirement_payload) + os.remove(temp_file) + + def test_add_python_package(self): + """ Test add_python_package """ + + # prepare + test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') + temp_file = os.path.join(test_data_dir, 'test_requirements.tmp') + + dependency_helper = DependencyHelper() + dependency_helper.add_python_package(dependency=VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0')) + dependency_helper.add_python_package(dependency=VersionedDependency(name='kubernetes', min_version='0.6.0')) + dependency_helper.add_python_package(dependency=VersionedDependency(name='tensorflow', min_version='0.12.0'), override=True) + dependency_helper.add_python_package(dependency=VersionedDependency(name='kubernetes', min_version='0.8.0'), override=False) + dependency_helper.generate_pip_requirements(temp_file) + golden_requirement_payload = '''\ +tensorflow >= 0.12.0 +kubernetes >= 0.6.0 ''' with open(temp_file, 'r') as f: target_requirement_payload = f.read() From 8839344143249230c2085836486b9d35d21e6865 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 16 Nov 2018 10:00:28 -0800 Subject: [PATCH 09/11] add unit tests --- sdk/python/tests/compiler/component_builder_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index 1a7ed105484..13ede0e4d9b 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -120,16 +120,19 @@ def test_add_python_package(self): dependency_helper.add_python_package(dependency=VersionedDependency(name='kubernetes', min_version='0.6.0')) dependency_helper.add_python_package(dependency=VersionedDependency(name='tensorflow', min_version='0.12.0'), override=True) dependency_helper.add_python_package(dependency=VersionedDependency(name='kubernetes', min_version='0.8.0'), override=False) + dependency_helper.add_python_package(dependency=VersionedDependency(name='pytorch', version='0.3.0')) dependency_helper.generate_pip_requirements(temp_file) golden_requirement_payload = '''\ tensorflow >= 0.12.0 kubernetes >= 0.6.0 +pytorch >= 0.3.0, <= 0.3.0 ''' with open(temp_file, 'r') as f: target_requirement_payload = f.read() self.assertEqual(target_requirement_payload, golden_requirement_payload) os.remove(temp_file) + class TestDockerfileHelper(unittest.TestCase): def test_wrap_files_in_tarball(self): From 15cb834c9a90080bc9bc7c5f05a7930a359bd731 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 16 Nov 2018 10:11:02 -0800 Subject: [PATCH 10/11] fix some unit tests --- .../tests/compiler/component_builder_test.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index d1484d308de..02eca774733 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -202,26 +202,25 @@ def test_prepare_docker_with_py_and_dependency(self): # prepare test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') python_filepath = os.path.join(test_data_dir, 'basic.py') - downloaded_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz') - gcs_tar_path = os.path.join(GCS_BASE, 'test_docker.tar.gz') + local_tarball_path = os.path.join(test_data_dir, 'test_docker.tar.gz') # check - docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile', gcs_path=gcs_tar_path) + docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile') dependencies = { VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0'), VersionedDependency(name='kubernetes', min_version='0.6.0'), } - docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0', dependency=dependencies) - GCSHelper.download_gcs_blob(downloaded_tarball, gcs_tar_path) - temp_tarball_handler = tarfile.open(downloaded_tarball) + docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath, + base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0', + local_tarball_path=local_tarball_path, dependency=dependencies) + temp_tarball_handler = tarfile.open(local_tarball_path) temp_files = temp_tarball_handler.getmembers() self.assertTrue(len(temp_files) == 3) for temp_file in temp_files: self.assertTrue(temp_file.name in ['dockerfile', 'main.py', 'requirements.txt']) # clean up - os.remove(downloaded_tarball) - GCSHelper.remove_gcs_blob(gcs_tar_path) + os.remove(local_tarball_path) def test_prepare_docker_tarball(self): """ Test the whole prepare docker tarball """ From 98129e37660d9b66091469d4766c027d2795f1eb Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 16 Nov 2018 17:36:55 -0800 Subject: [PATCH 11/11] fix bug in DependencyHelper --- sdk/python/kfp/compiler/_component_builder.py | 6 +++--- sdk/python/tests/compiler/component_builder_test.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sdk/python/kfp/compiler/_component_builder.py b/sdk/python/kfp/compiler/_component_builder.py index 9066fba27d7..8dc870df3b4 100644 --- a/sdk/python/kfp/compiler/_component_builder.py +++ b/sdk/python/kfp/compiler/_component_builder.py @@ -133,10 +133,10 @@ def generate_pip_requirements(self, target_file): for name, version in self.python_packages.items(): version_str = '' if version.has_min_version(): - version_str += ' >= ' + version.min_version + version_str += ' >= ' + version.min_version + ',' if version.has_max_version(): - version_str += ', <= ' + version.max_version - f.write(name + version_str + '\n') + version_str += ' <= ' + version.max_version + ',' + f.write(name + version_str.rstrip(',') + '\n') class DockerfileHelper(object): """ Dockerfile Helper generates a tarball with dockerfile, ready for docker build diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index 02eca774733..af2446e80ad 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -74,11 +74,13 @@ def test_generate_requirement(self): dependency_helper = DependencyHelper() dependency_helper.add_python_package(dependency=VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0')) dependency_helper.add_python_package(dependency=VersionedDependency(name='kubernetes', min_version='0.6.0')) + dependency_helper.add_python_package(dependency=VersionedDependency(name='pytorch', max_version='0.3.0')) dependency_helper.generate_pip_requirements(temp_file) golden_requirement_payload = '''\ tensorflow >= 0.10.0, <= 0.11.0 kubernetes >= 0.6.0 +pytorch <= 0.3.0 ''' with open(temp_file, 'r') as f: target_requirement_payload = f.read()