Skip to content

Commit

Permalink
SDK/Lightweight - Disabled code pickling by default
Browse files Browse the repository at this point in the history
I've introduced code pickling to capture dependencies in kubeflow#1372
Later I've discovered that there is a serious opcode incompatibility between python versions 3.5 and 3.6+. See my analysis of the issue: cloudpipe/cloudpickle#293

Dues to this issue I decided to switch back to using source code copying by default and to continue improving it.

Until we stop supporting python 3.5 (kubeflow#668) it's too dangerous to use code pickling by default.

Code pickling can be enabled by specifying `pickle_code=True` when calling `func_to_container_op`
  • Loading branch information
Ark-kun committed Jun 18, 2019
1 parent 8938669 commit ea0b2da
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 17 deletions.
54 changes: 42 additions & 12 deletions sdk/python/kfp/components/_python_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,38 @@ def _capture_function_code_using_cloudpickle(func, modules_to_capture: List[str]
return '\n'.join(code_lines)


def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None) -> ComponentSpec:
def _capture_function_code_using_source_copy(func) -> str:
import inspect

#Source code can include decorators line @python_op. Remove them
(func_code_lines, _) = inspect.getsourcelines(func)
while func_code_lines[0].lstrip().startswith('@'): #decorator
del func_code_lines[0]

#Function might be defined in some indented scope (e.g. in another function).
#We need to handle this and properly dedent the function source code
first_line = func_code_lines[0]
indent = len(first_line) - len(first_line.lstrip())
func_code_lines = [line[indent:] for line in func_code_lines]

#TODO: Add support for copying the NamedTuple subclass declaration code
#Adding NamedTuple import if needed
if hasattr(inspect.signature(func).return_annotation, '_fields'): #NamedTuple
func_code_lines.insert(0, '\n')
func_code_lines.insert(0, 'from typing import NamedTuple\n')

return ''.join(func_code_lines) #Lines retain their \n endings


def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None, pickle_code=False) -> ComponentSpec:
'''Takes a self-contained python function and converts it to component
Args:
func: Required. The function to be converted
base_image: Optional. Docker image to be used as a base image for the python component. Must have python 3.5+ installed. Default is tensorflow/tensorflow:1.11.0-py3
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
extra_code: Optional. Python source code that gets placed before the function code. Can be used as workaround to define types used in function signature.
pickle_code: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured.
'''
decorator_base_image = getattr(func, '_component_base_image', None)
Expand Down Expand Up @@ -168,7 +192,10 @@ def annotation_to_type_struct(annotation):

func_name=func.__name__

func_code = _capture_function_code_using_cloudpickle(func, modules_to_capture)
if pickle_code:
func_code = _capture_function_code_using_cloudpickle(func, modules_to_capture)
else:
func_code = _capture_function_code_using_source_copy(func)

extra_output_external_names = [name + '_file' for name in extra_output_names]

Expand Down Expand Up @@ -245,11 +272,11 @@ def annotation_to_type_struct(annotation):
return component_spec


def _func_to_component_dict(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None):
return _func_to_component_spec(func, extra_code, base_image, modules_to_capture).to_dict()
def _func_to_component_dict(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None, pickle_code=False):
return _func_to_component_spec(func, extra_code, base_image, modules_to_capture, pickle_code).to_dict()


def func_to_component_text(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None):
def func_to_component_text(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None, pickle_code=False):
'''
Converts a Python function to a component definition and returns its textual representation
Expand All @@ -268,15 +295,16 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. The actual algorithm: Starting with the initial function, start traversing dependencies. If the dependecy.__module__ is in the modules_to_capture list then it's captured and it's dependencies are traversed. Otherwise the dependency is only referenced instead of capturing and its dependencies are not traversed.
pickle_code: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image.
Returns:
Textual representation of a component definition
'''
component_dict = _func_to_component_dict(func, extra_code, base_image, modules_to_capture)
component_dict = _func_to_component_dict(func, extra_code, base_image, modules_to_capture, pickle_code)
return dump_yaml(component_dict)


def func_to_component_file(func, output_component_file, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None) -> None:
def func_to_component_file(func, output_component_file, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None, pickle_code=False) -> None:
'''
Converts a Python function to a component definition and writes it to a file
Expand All @@ -296,14 +324,15 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. The actual algorithm: Starting with the initial function, start traversing dependencies. If the dependecy.__module__ is in the modules_to_capture list then it's captured and it's dependencies are traversed. Otherwise the dependency is only referenced instead of capturing and its dependencies are not traversed.
pickle_code: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image.
'''

component_yaml = func_to_component_text(func, extra_code, base_image, modules_to_capture)

component_yaml = func_to_component_text(func, extra_code, base_image, modules_to_capture, pickle_code)
Path(output_component_file).write_text(component_yaml)


def func_to_container_op(func, output_component_file=None, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None):
def func_to_container_op(func, output_component_file=None, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None, pickle_code=False):
'''
Converts a Python function to a component and returns a task (ContainerOp) factory
Expand All @@ -323,13 +352,14 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s
output_component_file: Optional. Write a component definition to a local file. Can be used for sharing.
extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. The actual algorithm: Starting with the initial function, start traversing dependencies. If the dependecy.__module__ is in the modules_to_capture list then it's captured and it's dependencies are traversed. Otherwise the dependency is only referenced instead of capturing and its dependencies are not traversed.
pickle_code: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image.
Returns:
A factory function with a strongly-typed signature taken from the python function.
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp) that can run the original function in a container.
'''

component_spec = _func_to_component_spec(func, extra_code, base_image, modules_to_capture)
component_spec = _func_to_component_spec(func, extra_code, base_image, modules_to_capture, pickle_code)

output_component_file = output_component_file or getattr(func, '_component_target_component_file', None)
if output_component_file:
Expand Down
10 changes: 5 additions & 5 deletions sdk/python/tests/components/test_python_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def main_func(a: float, b: float) -> float:
return ExtraClass().class_method(a) + extra_func(b)

func = main_func
op = comp.func_to_container_op(func, output_component_file='comp.yaml')
op = comp.func_to_container_op(func, pickle_code=True)

self.helper_test_2_in_1_out_component_using_local_call(func, op)

Expand All @@ -146,27 +146,27 @@ def main_func(a: float, b: float) -> float:
raise AssertionError("f2 should not be captured, because it's not a dependency.")

expected_func = lambda a, b: a + b
op = comp.func_to_container_op(main_func)
op = comp.func_to_container_op(main_func, pickle_code=True)

self.helper_test_2_in_1_out_component_using_local_call(expected_func, op)

def test_func_to_container_op_call_other_func_global(self):
func = module_func_with_deps
op = comp.func_to_container_op(func, output_component_file='comp.yaml')
op = comp.func_to_container_op(func, pickle_code=True)

self.helper_test_2_in_1_out_component_using_local_call(func, op)

def test_func_to_container_op_with_imported_func(self):
from .test_data.module1 import module_func_with_deps as module1_func_with_deps
func = module1_func_with_deps
op = comp.func_to_container_op(func)
op = comp.func_to_container_op(func, pickle_code=True)

self.helper_test_2_in_1_out_component_using_local_call(func, op)

def test_func_to_container_op_with_imported_func2(self):
from .test_data.module2_which_depends_on_module1 import module2_func_with_deps as module2_func_with_deps
func = module2_func_with_deps
op = comp.func_to_container_op(func, modules_to_capture=[
op = comp.func_to_container_op(func, pickle_code=True, modules_to_capture=[
'tests.components.test_data.module1',
'tests.components.test_data.module2_which_depends_on_module1'
])
Expand Down

0 comments on commit ea0b2da

Please sign in to comment.