Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

component build support for both python2 and python3 #730

Merged
merged 11 commits into from
Feb 25, 2019
61 changes: 47 additions & 14 deletions sdk/python/kfp/compiler/_component_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,16 +145,34 @@ def _generate_pip_requirement(self, dependency, requirement_filepath):
dependency_helper.add_python_package(version)
dependency_helper.generate_pip_requirements(requirement_filepath)

def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath, has_requirement_file):
""" _generate_docker_file generates a simple dockerfile with the python path """
def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath, has_requirement_file, python_version):
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
""" _generate_docker_file generates a simple dockerfile with the python path
args:
target_file (str): target file name for the dockerfile.
base_image (str): the base image name.
python_filepath (str): the path of the python file that is copied to the docker image.
has_requirement_file (bool): whether it has a requirement file or not.
python_version (str): choose python2 or python3
"""
if python_version not in ['python2', 'python3']:
raise ValueError('python_version has to be either python2 or python3')
with open(target_file, 'w') as f:
f.write('FROM ' + base_image + '\n')
f.write('RUN apt-get update -y && apt-get install --no-install-recommends -y -q python3 python3-pip python3-setuptools\n')
if python_version is 'python3':
f.write('RUN apt-get update -y && apt-get install --no-install-recommends -y -q python3 python3-pip python3-setuptools\n')
else:
f.write('RUN apt-get update -y && apt-get install --no-install-recommends -y -q python python-pip python-setuptools\n')
if has_requirement_file:
f.write('ADD ' + self._ARC_REQUIREMENT_FILE + ' /ml/\n')
f.write('RUN pip3 install -r /ml/' + self._ARC_REQUIREMENT_FILE + '\n')
if python_version is 'python3':
f.write('RUN pip3 install -r /ml/' + self._ARC_REQUIREMENT_FILE + '\n')
else:
f.write('RUN pip install -r /ml/' + self._ARC_REQUIREMENT_FILE + '\n')
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
f.write('ADD ' + python_filepath + " /ml/" + '\n')
f.write('ENTRYPOINT ["python3", "/ml/' + python_filepath + '"]')
if python_version is 'python3':
f.write('ENTRYPOINT ["python3", "/ml/' + python_filepath + '"]')
else:
f.write('ENTRYPOINT ["python", "/ml/' + python_filepath + '"]')
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved

def _wrap_files_in_tarball(self, tarball_path, files={}):
""" _wrap_files_in_tarball creates a tarball for all the input files
Expand All @@ -165,16 +183,21 @@ def _wrap_files_in_tarball(self, tarball_path, files={}):
for key, value in files.items():
tarball.add(value, arcname=key)

def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, local_tarball_path, dependency=None):
""" prepare_docker_tarball is the API to generate dockerfile and prepare the tarball with python scripts """
def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, local_tarball_path, python_version, dependency=None):
""" prepare_docker_tarball is the API to generate dockerfile and prepare the tarball with python scripts
args:
python_version (str): choose python2 or python3
"""
if python_version not in ['python2', 'python3']:
raise ValueError('python_version has to be either python2 or python3')
with tempfile.TemporaryDirectory() as local_build_dir:
has_requirement_file = False
local_requirement_path = os.path.join(local_build_dir, self._ARC_REQUIREMENT_FILE)
if dependency is not None and len(dependency) != 0:
self._generate_pip_requirement(dependency, local_requirement_path)
has_requirement_file = True
local_dockerfile_path = os.path.join(local_build_dir, self._arc_dockerfile_name)
self._generate_dockerfile_with_py(local_dockerfile_path, base_image, arc_python_filename, has_requirement_file)
self._generate_dockerfile_with_py(local_dockerfile_path, base_image, arc_python_filename, has_requirement_file, python_version)
file_lists = {self._arc_dockerfile_name:local_dockerfile_path,
arc_python_filename:python_filepath}
if has_requirement_file:
Expand Down Expand Up @@ -358,8 +381,13 @@ def _build_image_from_tarball(self, local_tarball_path, namespace, timeout):
# Clean up
GCSHelper.remove_gcs_blob(self._gcs_path)

def build_image_from_func(self, component_func, namespace, base_image, timeout, dependency):
""" build_image builds an image for the given python function"""
def build_image_from_func(self, component_func, namespace, base_image, timeout, dependency, python_version='python3'):
""" build_image builds an image for the given python function
args:
python_version (str): choose python2 or python3, default is python3
"""
if python_version not in ['python2', 'python3']:
raise ValueError('python_version has to be either python2 or python3')
with tempfile.TemporaryDirectory() as local_build_dir:
# Generate entrypoint and serialization python codes
local_python_filepath = os.path.join(local_build_dir, self._arc_python_filepath)
Expand All @@ -376,6 +404,7 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout,
arc_python_filename=self._arc_python_filepath,
base_image=base_image,
local_tarball_path=local_tarball_path,
python_version=python_version,
dependency=dependency)
self._build_image_from_tarball(local_tarball_path, namespace, timeout)

Expand Down Expand Up @@ -445,7 +474,7 @@ def _generate_pythonop(component_func, target_image, target_component_file=None)

return _create_task_factory_from_component_spec(component_spec)

def build_python_component(component_func, target_image, base_image=None, dependency=[], staging_gcs_path=None, build_image=True, timeout=600, namespace='kubeflow', target_component_file=None):
def build_python_component(component_func, target_image, base_image=None, dependency=[], staging_gcs_path=None, build_image=True, timeout=600, namespace='kubeflow', target_component_file=None, python_version='python3'):
""" build_component automatically builds a container image for the component_func
based on the base_image and pushes to the target_image.

Expand All @@ -459,9 +488,9 @@ def build_python_component(component_func, target_image, base_image=None, depend
timeout (int): the timeout for the image build(in secs), default is 600 seconds
namespace (str): the namespace within which to run the kubernetes kaniko job, default is "kubeflow"
dependency (list): a list of VersionedDependency, which includes the package name and versions, default is empty

python_version (str): choose python2 or python3, default is python3
Raises:
ValueError: The function is not decorated with python_component decorator
ValueError: The function is not decorated with python_component decorator or the python_version is neither python2 nor python3
"""

_configure_logger(logging.getLogger())
Expand All @@ -471,6 +500,9 @@ def build_python_component(component_func, target_image, base_image=None, depend
if target_image is None:
raise ValueError('target_image must not be None')

if python_version not in ['python2', 'python3']:
raise ValueError('python_version has to be either python2 or python3')

if build_image:
if staging_gcs_path is None:
raise ValueError('staging_gcs_path must not be None')
Expand All @@ -486,7 +518,8 @@ def build_python_component(component_func, target_image, base_image=None, depend
target_image)
builder = ImageBuilder(gcs_base=staging_gcs_path, target_image=target_image)
builder.build_image_from_func(component_func, namespace=namespace,
base_image=base_image, timeout=timeout, dependency=dependency)
base_image=base_image, timeout=timeout,
python_version=python_version, dependency=dependency)
logging.info('Build component complete.')
return _generate_pythonop(component_func, target_image, target_component_file)

Expand Down
27 changes: 23 additions & 4 deletions sdk/python/tests/compiler/component_builder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,16 +161,34 @@ def test_generate_dockerfile(self):
ADD main.py /ml/
ENTRYPOINT ["python3", "/ml/main.py"]'''

golden_dockerfile_payload_three = '''\
FROM gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0
RUN apt-get update -y && apt-get install --no-install-recommends -y -q python python-pip python-setuptools
ADD requirements.txt /ml/
RUN pip install -r /ml/requirements.txt
ADD main.py /ml/
ENTRYPOINT ["python", "/ml/main.py"]'''
# check
docker_helper = DockerfileHelper(arc_dockerfile_name=target_dockerfile)
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py', has_requirement_file=False)
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0',
python_filepath='main.py', has_requirement_file=False, python_version='python3')
with open(target_dockerfile, 'r') as f:
target_dockerfile_payload = f.read()
self.assertEqual(target_dockerfile_payload, golden_dockerfile_payload_one)
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py', has_requirement_file=True)
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0',
python_filepath='main.py', has_requirement_file=True, python_version='python3')
with open(target_dockerfile, 'r') as f:
target_dockerfile_payload = f.read()
self.assertEqual(target_dockerfile_payload, golden_dockerfile_payload_two)
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0',
python_filepath='main.py', has_requirement_file=True, python_version='python2')
with open(target_dockerfile, 'r') as f:
target_dockerfile_payload = f.read()
self.assertEqual(target_dockerfile_payload, golden_dockerfile_payload_three)

self.assertRaises(ValueError, docker_helper._generate_dockerfile_with_py, target_file=target_dockerfile,
base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py',
has_requirement_file=True, python_version='python4')

# clean up
os.remove(target_dockerfile)
Expand All @@ -187,7 +205,7 @@ def test_prepare_docker_with_py(self):
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile')
docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath,
base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0',
local_tarball_path=local_tarball_path)
local_tarball_path=local_tarball_path, python_version='python3')
temp_tarball_handle = tarfile.open(local_tarball_path)
temp_files = temp_tarball_handle.getmembers()
self.assertTrue(len(temp_files) == 2)
Expand All @@ -213,7 +231,8 @@ def test_prepare_docker_with_py_and_dependency(self):
}
docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath,
base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0',
local_tarball_path=local_tarball_path, dependency=dependencies)
local_tarball_path=local_tarball_path, python_version='python3',
dependency=dependencies)
temp_tarball_handle = tarfile.open(local_tarball_path)
temp_files = temp_tarball_handle.getmembers()
self.assertTrue(len(temp_files) == 3)
Expand Down