Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SDK/Lightweight - Disabled code pickling by default #1512

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 42 additions & 12 deletions sdk/python/kfp/components/_python_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,29 @@ def _capture_function_code_using_cloudpickle(func, modules_to_capture: List[str]
return '\n'.join(code_lines)


def _capture_function_code_using_source_copy(func) -> str:
import inspect

#Source code can include decorators line @python_op. Remove them
(func_code_lines, _) = inspect.getsourcelines(func)
while func_code_lines[0].lstrip().startswith('@'): #decorator
del func_code_lines[0]

#Function might be defined in some indented scope (e.g. in another function).
#We need to handle this and properly dedent the function source code
first_line = func_code_lines[0]
indent = len(first_line) - len(first_line.lstrip())
func_code_lines = [line[indent:] for line in func_code_lines]

#TODO: Add support for copying the NamedTuple subclass declaration code
#Adding NamedTuple import if needed
if hasattr(inspect.signature(func).return_annotation, '_fields'): #NamedTuple
func_code_lines.insert(0, '\n')
func_code_lines.insert(0, 'from typing import NamedTuple\n')

return ''.join(func_code_lines) #Lines retain their \n endings


def _extract_component_interface(func) -> ComponentSpec:
single_output_name_const = 'Output'

Expand Down Expand Up @@ -150,7 +173,7 @@ def annotation_to_type_struct(annotation):
return component_spec


def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None) -> ComponentSpec:
def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None, use_code_pickling=False) -> ComponentSpec:
'''Takes a self-contained python function and converts it to component

Args:
Expand All @@ -159,6 +182,7 @@ def _func_to_component_spec(func, extra_code='', base_image=_default_base_image,
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
extra_code: Optional. Python source code that gets placed before the function code. Can be used as workaround to define types used in function signature.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured.
use_code_pickling: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image.
'''
decorator_base_image = getattr(func, '_component_base_image', None)
if decorator_base_image is not None:
Expand All @@ -176,7 +200,10 @@ def _func_to_component_spec(func, extra_code='', base_image=_default_base_image,
arguments.extend(InputValuePlaceholder(input.name) for input in component_spec.inputs)
arguments.extend(OutputPathPlaceholder(output.name) for output in component_spec.outputs)

func_code = _capture_function_code_using_cloudpickle(func, modules_to_capture)
if use_code_pickling:
func_code = _capture_function_code_using_cloudpickle(func, modules_to_capture)
else:
func_code = _capture_function_code_using_source_copy(func)

extra_output_names = [output.name for output in component_spec.outputs]
extra_output_external_names = [name + '_file' for name in extra_output_names]
Expand Down Expand Up @@ -245,11 +272,11 @@ def _func_to_component_spec(func, extra_code='', base_image=_default_base_image,
return component_spec


def _func_to_component_dict(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None):
return _func_to_component_spec(func, extra_code, base_image, modules_to_capture).to_dict()
def _func_to_component_dict(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None, use_code_pickling=False):
return _func_to_component_spec(func, extra_code, base_image, modules_to_capture, use_code_pickling).to_dict()


def func_to_component_text(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None):
def func_to_component_text(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None, use_code_pickling=False):
'''
Converts a Python function to a component definition and returns its textual representation

Expand All @@ -268,15 +295,16 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. The actual algorithm: Starting with the initial function, start traversing dependencies. If the dependecy.__module__ is in the modules_to_capture list then it's captured and it's dependencies are traversed. Otherwise the dependency is only referenced instead of capturing and its dependencies are not traversed.

use_code_pickling: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image.

Returns:
Textual representation of a component definition
'''
component_dict = _func_to_component_dict(func, extra_code, base_image, modules_to_capture)
component_dict = _func_to_component_dict(func, extra_code, base_image, modules_to_capture, use_code_pickling)
return dump_yaml(component_dict)


def func_to_component_file(func, output_component_file, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None) -> None:
def func_to_component_file(func, output_component_file, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None, use_code_pickling=False) -> None:
'''
Converts a Python function to a component definition and writes it to a file

Expand All @@ -296,14 +324,15 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. The actual algorithm: Starting with the initial function, start traversing dependencies. If the dependecy.__module__ is in the modules_to_capture list then it's captured and it's dependencies are traversed. Otherwise the dependency is only referenced instead of capturing and its dependencies are not traversed.
use_code_pickling: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image.
'''

component_yaml = func_to_component_text(func, extra_code, base_image, modules_to_capture)

component_yaml = func_to_component_text(func, extra_code, base_image, modules_to_capture, use_code_pickling)
Path(output_component_file).write_text(component_yaml)


def func_to_container_op(func, output_component_file=None, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None):
def func_to_container_op(func, output_component_file=None, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None, use_code_pickling=False):
'''
Converts a Python function to a component and returns a task (ContainerOp) factory

Expand All @@ -323,13 +352,14 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s
output_component_file: Optional. Write a component definition to a local file. Can be used for sharing.
extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured. The actual algorithm: Starting with the initial function, start traversing dependencies. If the dependecy.__module__ is in the modules_to_capture list then it's captured and it's dependencies are traversed. Otherwise the dependency is only referenced instead of capturing and its dependencies are not traversed.
use_code_pickling: Specifies whether the function code should be captured using pickling as opposed to source code manipulation. Pickling has better support for capturing dependencies, but is sensitive to version mismatch between python in component creation environment and runtime image.

Returns:
A factory function with a strongly-typed signature taken from the python function.
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp) that can run the original function in a container.
'''

component_spec = _func_to_component_spec(func, extra_code, base_image, modules_to_capture)
component_spec = _func_to_component_spec(func, extra_code, base_image, modules_to_capture, use_code_pickling)

output_component_file = output_component_file or getattr(func, '_component_target_component_file', None)
if output_component_file:
Expand Down
10 changes: 5 additions & 5 deletions sdk/python/tests/components/test_python_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def main_func(a: float, b: float) -> float:
return ExtraClass().class_method(a) + extra_func(b)

func = main_func
op = comp.func_to_container_op(func, output_component_file='comp.yaml')
op = comp.func_to_container_op(func, use_code_pickling=True)

self.helper_test_2_in_1_out_component_using_local_call(func, op)

Expand All @@ -146,27 +146,27 @@ def main_func(a: float, b: float) -> float:
raise AssertionError("f2 should not be captured, because it's not a dependency.")

expected_func = lambda a, b: a + b
op = comp.func_to_container_op(main_func)
op = comp.func_to_container_op(main_func, use_code_pickling=True)

self.helper_test_2_in_1_out_component_using_local_call(expected_func, op)

def test_func_to_container_op_call_other_func_global(self):
func = module_func_with_deps
op = comp.func_to_container_op(func, output_component_file='comp.yaml')
op = comp.func_to_container_op(func, use_code_pickling=True)

self.helper_test_2_in_1_out_component_using_local_call(func, op)

def test_func_to_container_op_with_imported_func(self):
from .test_data.module1 import module_func_with_deps as module1_func_with_deps
func = module1_func_with_deps
op = comp.func_to_container_op(func)
op = comp.func_to_container_op(func, use_code_pickling=True)

self.helper_test_2_in_1_out_component_using_local_call(func, op)

def test_func_to_container_op_with_imported_func2(self):
from .test_data.module2_which_depends_on_module1 import module2_func_with_deps as module2_func_with_deps
func = module2_func_with_deps
op = comp.func_to_container_op(func, modules_to_capture=[
op = comp.func_to_container_op(func, use_code_pickling=True, modules_to_capture=[
'tests.components.test_data.module1',
'tests.components.test_data.module2_which_depends_on_module1'
])
Expand Down