Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for dependencies in the component image building #219

Merged
merged 13 commits into from
Nov 17, 2018
Merged
2 changes: 1 addition & 1 deletion sdk/python/kfp/compiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@


from .compiler import Compiler
from ._component_builder import build_python_component, build_docker_image
from ._component_builder import build_python_component, build_docker_image, VersionedDependency
118 changes: 108 additions & 10 deletions sdk/python/kfp/compiler/_component_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import re
import tempfile
import logging
from collections import OrderedDict
import sys
from google.cloud import storage
from pathlib import PurePath, Path
Expand Down Expand Up @@ -59,20 +60,106 @@ def download_gcs_blob(local_path, gcs_path):
blob = bucket.blob(gcs_blob)
blob.download_to_filename(local_path)

class VersionedDependency(object):
""" DependencyVersion specifies the versions """
def __init__(self, name, version=None, min_version=None, max_version=None):
""" if version is specified, no need for min_version or max_version;
if both are specified, version is adopted """
self._name = name
if version is not None:
self._min_version = version
self._max_version = version
else:
self._min_version = min_version
self._max_version = max_version

@property
def name(self):
return self._name

@property
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
def min_version(self):
return self._min_version

@min_version.setter
def min_version(self, min_version):
self._min_version = min_version

def has_min_version(self):
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
return self._min_version != None

@property
def max_version(self):
return self._max_version

@max_version.setter
def max_version(self, max_version):
self._max_version = max_version

def has_max_version(self):
return self._max_version != None

def has_versions(self):
return (self.has_min_version()) or (self.has_max_version())

gaoning777 marked this conversation as resolved.
Show resolved Hide resolved

class DependencyHelper(object):
""" DependencyHelper manages software dependency information """
def __init__(self):
self._PYTHON_PACKAGE = 'PYTHON_PACKAGE'
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
self._dependency = {self._PYTHON_PACKAGE:OrderedDict()}

@property
def python_packages(self):
return self._dependency[self._PYTHON_PACKAGE]

def add_python_package(self, dependency, override=True):
""" add_single_python_package adds a dependency for the python package

Args:
name: package name
version: it could be a specific version(1.10.0), or a range(>=1.0,<=2.0)
if not specified, the default is resolved automatically by the pip system.
override: whether to override the version if already existing in the dependency.
"""
if dependency.name in self.python_packages and not override:
return
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
self.python_packages[dependency.name] = dependency

def generate_pip_requirements(self, target_file):
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
""" write the python packages to a requirement file
the generated file follows the order of which the packages are added """
with open(target_file, 'w') as f:
for name, version in self.python_packages.items():
version_str = ''
if version.has_min_version():
version_str += ' >= ' + version.min_version + ','
if version.has_max_version():
version_str += ' <= ' + version.max_version + ','
f.write(name + version_str.rstrip(',') + '\n')

class DockerfileHelper(object):
""" Dockerfile Helper generates a tarball with dockerfile, ready for docker build
arc_dockerfile_name: dockerfile filename that is stored in the tarball """

def __init__(self, arc_dockerfile_name):
self._arc_dockerfile_name = arc_dockerfile_name
self._ARC_REQUIREMENT_FILE = 'requirements.txt'

def _generate_pip_requirement(self, dependency, requirement_filepath):
dependency_helper = DependencyHelper()
for version in dependency:
dependency_helper.add_python_package(version)
dependency_helper.generate_pip_requirements(requirement_filepath)

def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath):
def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath, has_requirement_file):
""" _generate_docker_file generates a simple dockerfile with the python path """
with open(target_file, 'w') as f:
f.write('FROM ' + base_image + '\n')
f.write('RUN apt-get update -y && apt-get install --no-install-recommends -y -q python3 python3-pip python3-setuptools\n')
f.write('RUN pip3 install fire\n')
if has_requirement_file:
f.write('RUN pip3 install -r ' + self._ARC_REQUIREMENT_FILE + '\n')
f.write('ADD ' + python_filepath + " /ml/" + '\n')
f.write('ENTRYPOINT ["python3", "/ml/' + python_filepath + '"]')

Expand All @@ -85,13 +172,21 @@ def _wrap_files_in_tarball(self, tarball_path, files={}):
for key, value in files.items():
tarball.add(value, arcname=key)

def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, local_tarball_path):
def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, local_tarball_path, dependency=None):
""" prepare_docker_tarball is the API to generate dockerfile and prepare the tarball with python scripts """
with tempfile.TemporaryDirectory() as local_build_dir:
has_requirement_file = False
local_requirement_path = os.path.join(local_build_dir, self._ARC_REQUIREMENT_FILE)
if dependency is not None and len(dependency) != 0:
self._generate_pip_requirement(dependency, local_requirement_path)
has_requirement_file = True
local_dockerfile_path = os.path.join(local_build_dir, self._arc_dockerfile_name)
self._generate_dockerfile_with_py(local_dockerfile_path, base_image, arc_python_filename)
self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:local_dockerfile_path,
arc_python_filename:python_filepath})
self._generate_dockerfile_with_py(local_dockerfile_path, base_image, arc_python_filename, has_requirement_file)
file_lists = {self._arc_dockerfile_name:local_dockerfile_path,
arc_python_filename:python_filepath}
if has_requirement_file:
file_lists[self._ARC_REQUIREMENT_FILE] = local_requirement_path
self._wrap_files_in_tarball(local_tarball_path, file_lists)

def prepare_docker_tarball(self, dockerfile_path, local_tarball_path):
""" prepare_docker_tarball is the API to prepare a tarball with the dockerfile """
Expand Down Expand Up @@ -232,7 +327,7 @@ def _generate_entrypoint(self, component_func):
complete_component_code = dedecorated_component_src + '\n' + wrapper_code + '\n' + codegen.end()
return complete_component_code

def build_image_from_func(self, component_func, namespace, base_image, timeout):
def build_image_from_func(self, component_func, namespace, base_image, timeout, dependency):
""" build_image builds an image for the given python function"""

# Generate entrypoint and serialization python codes
Expand All @@ -249,7 +344,8 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout):
local_tarball_file = os.path.join(local_build_dir, 'docker.tmp.tar.gz')
docker_helper.prepare_docker_tarball_with_py(python_filepath=local_python_filepath,
arc_python_filename=self._arc_python_filepath,
base_image=base_image, local_tarball_path=local_tarball_file)
base_image=base_image, local_tarball_path=local_tarball_file,
dependency=dependency)
GCSHelper.upload_gcs_file(local_tarball_file, self._gcs_path)

kaniko_spec = self._generate_kaniko_spec(namespace=namespace,
Expand Down Expand Up @@ -343,7 +439,7 @@ def _generate_pythonop(component_func, target_image, target_component_file=None)

return _create_task_factory_from_component_dict(component_artifact)

def build_python_component(component_func, target_image, base_image=None, staging_gcs_path=None, build_image=True, timeout=600, namespace='kubeflow', target_component_file=None):
def build_python_component(component_func, target_image, base_image=None, dependency=[], staging_gcs_path=None, build_image=True, timeout=600, namespace='kubeflow', target_component_file=None):
""" build_component automatically builds a container image for the component_func
based on the base_image and pushes to the target_image.

Expand All @@ -352,9 +448,11 @@ def build_python_component(component_func, target_image, base_image=None, stagin
base_image (str): Docker image to use as a base image
target_image (str): Full URI to push the target image
staging_gcs_path (str): GCS blob that can store temporary build files
target_image (str): target image path
build_image (bool): whether to build the image or not. Default is True.
timeout (int): the timeout for the image build(in secs), default is 600 seconds
namespace (str): the namespace within which to run the kubernetes kaniko job, default is "kubeflow"
build_image (bool): whether to build the image or not. Default is True.
dependency (list): a list of VersionedDependency, which includes the package name and versions, default is empty

Raises:
ValueError: The function is not decorated with python_component decorator
Expand Down Expand Up @@ -382,7 +480,7 @@ def build_python_component(component_func, target_image, base_image=None, stagin
target_image)
builder = ImageBuilder(gcs_base=staging_gcs_path, target_image=target_image)
builder.build_image_from_func(component_func, namespace=namespace,
base_image=base_image, timeout=timeout)
base_image=base_image, timeout=timeout, dependency=dependency)
logging.info('Build component complete.')
return _generate_pythonop(component_func, target_image, target_component_file)

Expand Down
129 changes: 126 additions & 3 deletions sdk/python/tests/compiler/component_builder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,102 @@
from kfp.compiler._component_builder import DockerfileHelper
from kfp.compiler._component_builder import CodeGenerator
from kfp.compiler._component_builder import ImageBuilder
from kfp.compiler._component_builder import VersionedDependency
from kfp.compiler._component_builder import DependencyHelper

import os
import unittest
import yaml
import tarfile
from pathlib import Path
import inspect
from collections import OrderedDict

GCS_BASE = 'gs://kfp-testing/'

class TestVersionedDependency(unittest.TestCase):

def test_version(self):
""" test version overrides min_version and max_version """
version = VersionedDependency(name='tensorflow', version='0.3.0', min_version='0.1.0', max_version='0.4.0')
self.assertTrue(version.min_version == '0.3.0')
self.assertTrue(version.max_version == '0.3.0')
self.assertTrue(version.has_versions())
self.assertTrue(version.name == 'tensorflow')

def test_minmax_version(self):
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
""" test if min_version and max_version are configured when version is not given """
version = VersionedDependency(name='tensorflow', min_version='0.1.0', max_version='0.4.0')
self.assertTrue(version.min_version == '0.1.0')
self.assertTrue(version.max_version == '0.4.0')
self.assertTrue(version.has_versions())

def test_min_or_max_version(self):
""" test if min_version and max_version are configured when version is not given """
version = VersionedDependency(name='tensorflow', min_version='0.1.0')
self.assertTrue(version.min_version == '0.1.0')
self.assertTrue(version.has_versions())
version = VersionedDependency(name='tensorflow', max_version='0.3.0')
self.assertTrue(version.max_version == '0.3.0')
self.assertTrue(version.has_versions())

def test_no_version(self):
""" test the no version scenario """
version = VersionedDependency(name='tensorflow')
self.assertFalse(version.has_min_version())
self.assertFalse(version.has_max_version())
self.assertFalse(version.has_versions())

class TestDependencyHelper(unittest.TestCase):

def test_generate_requirement(self):
""" Test generating requirement file """

# prepare
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
temp_file = os.path.join(test_data_dir, 'test_requirements.tmp')

dependency_helper = DependencyHelper()
dependency_helper.add_python_package(dependency=VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0'))
dependency_helper.add_python_package(dependency=VersionedDependency(name='kubernetes', min_version='0.6.0'))
dependency_helper.add_python_package(dependency=VersionedDependency(name='pytorch', max_version='0.3.0'))
dependency_helper.generate_pip_requirements(temp_file)

golden_requirement_payload = '''\
tensorflow >= 0.10.0, <= 0.11.0
kubernetes >= 0.6.0
pytorch <= 0.3.0
'''
with open(temp_file, 'r') as f:
target_requirement_payload = f.read()
self.assertEqual(target_requirement_payload, golden_requirement_payload)
os.remove(temp_file)

def test_add_python_package(self):
""" Test add_python_package """

# prepare
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
temp_file = os.path.join(test_data_dir, 'test_requirements.tmp')

dependency_helper = DependencyHelper()
dependency_helper.add_python_package(dependency=VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0'))
dependency_helper.add_python_package(dependency=VersionedDependency(name='kubernetes', min_version='0.6.0'))
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
dependency_helper.add_python_package(dependency=VersionedDependency(name='tensorflow', min_version='0.12.0'), override=True)
dependency_helper.add_python_package(dependency=VersionedDependency(name='kubernetes', min_version='0.8.0'), override=False)
dependency_helper.add_python_package(dependency=VersionedDependency(name='pytorch', version='0.3.0'))
dependency_helper.generate_pip_requirements(temp_file)
golden_requirement_payload = '''\
tensorflow >= 0.12.0
kubernetes >= 0.6.0
pytorch >= 0.3.0, <= 0.3.0
'''
with open(temp_file, 'r') as f:
target_requirement_payload = f.read()
self.assertEqual(target_requirement_payload, golden_requirement_payload)
os.remove(temp_file)


class TestDockerfileHelper(unittest.TestCase):

def test_wrap_files_in_tarball(self):
Expand Down Expand Up @@ -62,19 +148,30 @@ def test_generate_dockerfile(self):
# prepare
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
target_dockerfile = os.path.join(test_data_dir, 'component.temp.dockerfile')
golden_dockerfile_payload = '''\
golden_dockerfile_payload_one = '''\
FROM gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0
RUN apt-get update -y && apt-get install --no-install-recommends -y -q python3 python3-pip python3-setuptools
RUN pip3 install fire
ADD main.py /ml/
ENTRYPOINT ["python3", "/ml/main.py"]'''
golden_dockerfile_payload_two = '''\
FROM gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0
RUN apt-get update -y && apt-get install --no-install-recommends -y -q python3 python3-pip python3-setuptools
RUN pip3 install fire
RUN pip3 install -r requirements.txt
ADD main.py /ml/
ENTRYPOINT ["python3", "/ml/main.py"]'''

# check
docker_helper = DockerfileHelper(arc_dockerfile_name=target_dockerfile)
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py')
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py', has_requirement_file=False)
with open(target_dockerfile, 'r') as f:
target_dockerfile_payload = f.read()
self.assertEqual(target_dockerfile_payload, golden_dockerfile_payload)
self.assertEqual(target_dockerfile_payload, golden_dockerfile_payload_one)
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py', has_requirement_file=True)
with open(target_dockerfile, 'r') as f:
target_dockerfile_payload = f.read()
self.assertEqual(target_dockerfile_payload, golden_dockerfile_payload_two)

# clean up
os.remove(target_dockerfile)
Expand All @@ -101,6 +198,32 @@ def test_prepare_docker_with_py(self):
# clean up
os.remove(generated_tarball)

def test_prepare_docker_with_py_and_dependency(self):
""" Test the whole prepare docker from python function and dependencies """

# prepare
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
python_filepath = os.path.join(test_data_dir, 'basic.py')
local_tarball_path = os.path.join(test_data_dir, 'test_docker.tar.gz')

# check
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile')
dependencies = {
VersionedDependency(name='tensorflow', min_version='0.10.0', max_version='0.11.0'),
VersionedDependency(name='kubernetes', min_version='0.6.0'),
}
docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath,
base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0',
local_tarball_path=local_tarball_path, dependency=dependencies)
temp_tarball_handler = tarfile.open(local_tarball_path)
temp_files = temp_tarball_handler.getmembers()
self.assertTrue(len(temp_files) == 3)
for temp_file in temp_files:
self.assertTrue(temp_file.name in ['dockerfile', 'main.py', 'requirements.txt'])

# clean up
os.remove(local_tarball_path)

def test_prepare_docker_tarball(self):
""" Test the whole prepare docker tarball """

Expand Down