From ea0b2dad8b3bab669baa7b04c3a914bbe82f9e5b Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 13 Jun 2019 23:27:59 -0700 Subject: [PATCH] SDK/Lightweight - Disabled code pickling by default I've introduced code pickling to capture dependencies in https://github.com/kubeflow/pipelines/pull/1372 Later I've discovered that there is a serious opcode incompatibility between python versions 3.5 and 3.6+. See my analysis of the issue: https://github.com/cloudpipe/cloudpickle/issues/293 Dues to this issue I decided to switch back to using source code copying by default and to continue improving it. Until we stop supporting python 3.5 (https://github.com/kubeflow/pipelines/pull/668) it's too dangerous to use code pickling by default. Code pickling can be enabled by specifying `pickle_code=True` when calling `func_to_container_op` --- sdk/python/kfp/components/_python_op.py | 54 ++++++++++++++----- sdk/python/tests/components/test_python_op.py | 10 ++-- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/sdk/python/kfp/components/_python_op.py b/sdk/python/kfp/components/_python_op.py index 37761b710bed..c8ee4aa3a0b4 100644 --- a/sdk/python/kfp/components/_python_op.py +++ b/sdk/python/kfp/components/_python_op.py @@ -85,7 +85,30 @@ def _capture_function_code_using_cloudpickle(func, modules_to_capture: List[str] return '\n'.join(code_lines) -def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None) -> ComponentSpec: +def _capture_function_code_using_source_copy(func) -> str: + import inspect + + #Source code can include decorators line @python_op. Remove them + (func_code_lines, _) = inspect.getsourcelines(func) + while func_code_lines[0].lstrip().startswith('@'): #decorator + del func_code_lines[0] + + #Function might be defined in some indented scope (e.g. in another function). + #We need to handle this and properly dedent the function source code + first_line = func_code_lines[0] + indent = len(first_line) - len(first_line.lstrip()) + func_code_lines = [line[indent:] for line in func_code_lines] + + #TODO: Add support for copying the NamedTuple subclass declaration code + #Adding NamedTuple import if needed + if hasattr(inspect.signature(func).return_annotation, '_fields'): #NamedTuple + func_code_lines.insert(0, '\n') + func_code_lines.insert(0, 'from typing import NamedTuple\n') + + return ''.join(func_code_lines) #Lines retain their \n endings + + +def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None, pickle_code=False) -> ComponentSpec: '''Takes a self-contained python function and converts it to component Args: @@ -93,6 +116,7 @@ def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, base_image: Optional. Docker image to be used as a base image for the python component. Must have python 3.5+ installed. Default is tensorflow/tensorflow:1.11.0-py3 Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised. extra_code: Optional. Python source code that gets placed before the function code. Can be used as workaround to define types used in function signature. + pickle_code: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image. modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. ''' decorator_base_image = getattr(func, '_component_base_image', None) @@ -168,7 +192,10 @@ def annotation_to_type_struct(annotation): func_name=func.__name__ - func_code = _capture_function_code_using_cloudpickle(func, modules_to_capture) + if pickle_code: + func_code = _capture_function_code_using_cloudpickle(func, modules_to_capture) + else: + func_code = _capture_function_code_using_source_copy(func) extra_output_external_names = [name + '_file' for name in extra_output_names] @@ -245,11 +272,11 @@ def annotation_to_type_struct(annotation): return component_spec -def _func_to_component_dict(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None): - return _func_to_component_spec(func, extra_code, base_image, modules_to_capture).to_dict() +def _func_to_component_dict(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None, pickle_code=False): + return _func_to_component_spec(func, extra_code, base_image, modules_to_capture, pickle_code).to_dict() -def func_to_component_text(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None): +def func_to_component_text(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None, pickle_code=False): ''' Converts a Python function to a component definition and returns its textual representation @@ -268,15 +295,16 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised. extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature. modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. The actual algorithm: Starting with the initial function, start traversing dependencies. If the dependecy.__module__ is in the modules_to_capture list then it's captured and it's dependencies are traversed. Otherwise the dependency is only referenced instead of capturing and its dependencies are not traversed. - + pickle_code: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image. + Returns: Textual representation of a component definition ''' - component_dict = _func_to_component_dict(func, extra_code, base_image, modules_to_capture) + component_dict = _func_to_component_dict(func, extra_code, base_image, modules_to_capture, pickle_code) return dump_yaml(component_dict) -def func_to_component_file(func, output_component_file, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None) -> None: +def func_to_component_file(func, output_component_file, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None, pickle_code=False) -> None: ''' Converts a Python function to a component definition and writes it to a file @@ -296,14 +324,15 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised. extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature. modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. The actual algorithm: Starting with the initial function, start traversing dependencies. If the dependecy.__module__ is in the modules_to_capture list then it's captured and it's dependencies are traversed. Otherwise the dependency is only referenced instead of capturing and its dependencies are not traversed. + pickle_code: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image. ''' - component_yaml = func_to_component_text(func, extra_code, base_image, modules_to_capture) - + component_yaml = func_to_component_text(func, extra_code, base_image, modules_to_capture, pickle_code) + Path(output_component_file).write_text(component_yaml) -def func_to_container_op(func, output_component_file=None, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None): +def func_to_container_op(func, output_component_file=None, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None, pickle_code=False): ''' Converts a Python function to a component and returns a task (ContainerOp) factory @@ -323,13 +352,14 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s output_component_file: Optional. Write a component definition to a local file. Can be used for sharing. extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature. modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. The actual algorithm: Starting with the initial function, start traversing dependencies. If the dependecy.__module__ is in the modules_to_capture list then it's captured and it's dependencies are traversed. Otherwise the dependency is only referenced instead of capturing and its dependencies are not traversed. + pickle_code: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image. Returns: A factory function with a strongly-typed signature taken from the python function. Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp) that can run the original function in a container. ''' - component_spec = _func_to_component_spec(func, extra_code, base_image, modules_to_capture) + component_spec = _func_to_component_spec(func, extra_code, base_image, modules_to_capture, pickle_code) output_component_file = output_component_file or getattr(func, '_component_target_component_file', None) if output_component_file: diff --git a/sdk/python/tests/components/test_python_op.py b/sdk/python/tests/components/test_python_op.py index 2d833343b880..4fab78629fc3 100644 --- a/sdk/python/tests/components/test_python_op.py +++ b/sdk/python/tests/components/test_python_op.py @@ -126,7 +126,7 @@ def main_func(a: float, b: float) -> float: return ExtraClass().class_method(a) + extra_func(b) func = main_func - op = comp.func_to_container_op(func, output_component_file='comp.yaml') + op = comp.func_to_container_op(func, pickle_code=True) self.helper_test_2_in_1_out_component_using_local_call(func, op) @@ -146,27 +146,27 @@ def main_func(a: float, b: float) -> float: raise AssertionError("f2 should not be captured, because it's not a dependency.") expected_func = lambda a, b: a + b - op = comp.func_to_container_op(main_func) + op = comp.func_to_container_op(main_func, pickle_code=True) self.helper_test_2_in_1_out_component_using_local_call(expected_func, op) def test_func_to_container_op_call_other_func_global(self): func = module_func_with_deps - op = comp.func_to_container_op(func, output_component_file='comp.yaml') + op = comp.func_to_container_op(func, pickle_code=True) self.helper_test_2_in_1_out_component_using_local_call(func, op) def test_func_to_container_op_with_imported_func(self): from .test_data.module1 import module_func_with_deps as module1_func_with_deps func = module1_func_with_deps - op = comp.func_to_container_op(func) + op = comp.func_to_container_op(func, pickle_code=True) self.helper_test_2_in_1_out_component_using_local_call(func, op) def test_func_to_container_op_with_imported_func2(self): from .test_data.module2_which_depends_on_module1 import module2_func_with_deps as module2_func_with_deps func = module2_func_with_deps - op = comp.func_to_container_op(func, modules_to_capture=[ + op = comp.func_to_container_op(func, pickle_code=True, modules_to_capture=[ 'tests.components.test_data.module1', 'tests.components.test_data.module2_which_depends_on_module1' ])