Skip to content

Commit

Permalink
SDK - Lightweight - Convert the names of file inputs and outputs
Browse files Browse the repository at this point in the history
Removing the "_path" and "_file" suffixes from the names of file inputs and outputs.
Problem: When accepting file inputs (outputs), the function inside the component receives file paths (or file streams), so it's natural to call the function parameter "something_file_path" (e.g. model_file_path or number_file_path).
But from the outside perspective, there are no files or paths - the actual data objects (or references to them) are passed in.
It looks very strange when argument passing code looks like this: `component(number_file_path=42)`. This looks like an error since 42 is not a path. It's not even a string.
It's much more natural to strip the names of file inputs and outputs of "_file" or "_path" suffixes. Then the argument passing code will look natural: "component(number=42)"
  • Loading branch information
Ark-kun committed Sep 28, 2019
1 parent 33bbe54 commit b3003bd
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 18 deletions.
39 changes: 33 additions & 6 deletions sdk/python/kfp/components/_python_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from ._yaml_utils import dump_yaml
from ._components import _create_task_factory_from_component_spec
from ._data_passing import serialize_value, type_name_to_deserializer, type_name_to_serializer, type_to_type_name
from ._naming import _make_name_unique_by_adding_index
from ._structures import *

import inspect
Expand All @@ -38,6 +39,9 @@
T = TypeVar('T')


_FEATURE_STRIP_FILE_IO_NAME_PARTS = True


# InputPath(list) or InputPath('JsonObject')

class InputPath:
Expand Down Expand Up @@ -232,35 +236,53 @@ def annotation_to_type_struct(annotation):
type_name = type_to_type_name[type_name]
return type_name

input_names = set()
output_names = set()
for parameter in parameters:
parameter_annotation = parameter.annotation
passing_style = None
io_name = parameter.name
if isinstance(parameter_annotation, (InputPath, InputTextFile, InputBinaryFile, OutputPath, OutputTextFile, OutputBinaryFile)):
passing_style = type(parameter_annotation)
parameter_annotation = parameter_annotation.type
if parameter.default is not inspect.Parameter.empty:
raise ValueError('Default values for file inputs/outputs are not supported. If you need them for some reason, please create an issue and write about your usage scenario.')
# TODO: Fix the input names: "number_file_path" parameter should be exposed as "number" input
if _FEATURE_STRIP_FILE_IO_NAME_PARTS:
# Removing the "_path" and "_file" suffixes from the input/output names as the argument passed to the component needs to be the data itself, not local file path.
# Problem: When accepting file inputs (outputs), the function inside the component receives file paths (or file streams), so it's natural to call the function parameter "something_file_path" (e.g. model_file_path or number_file_path).
# But from the outside perspective, there are no files or paths - the actual data objects (or references to them) are passed in.
# It looks very strange when argument passing code looks like this: `component(number_file_path=42)`. This looks like an error since 42 is not a path. It's not even a string.
# It's much more natural to strip the names of file inputs and outputs of "_file" or "_path" suffixes. Then the argument passing code will look natural: "component(number=42)".
if isinstance(parameter.annotation, (InputPath, OutputPath)) and io_name.endswith('_path'):
io_name = io_name[0:-len('_path')]
if io_name.endswith('_file'):
io_name = io_name[0:-len('_file')]
type_struct = annotation_to_type_struct(parameter_annotation)
#TODO: Humanize the input/output names

if isinstance(parameter.annotation, (OutputPath, OutputTextFile, OutputBinaryFile)):
io_name = _make_name_unique_by_adding_index(io_name, output_names, '_')
output_names.add(io_name)
output_spec = OutputSpec(
name=parameter.name,
name=io_name,
type=type_struct,
)
output_spec._passing_style = passing_style
output_spec._parameter_name = parameter.name
outputs.append(output_spec)
else:
io_name = _make_name_unique_by_adding_index(io_name, input_names, '_')
input_names.add(io_name)
input_spec = InputSpec(
name=parameter.name,
name=io_name,
type=type_struct,
)
if parameter.default is not inspect.Parameter.empty:
input_spec.optional = True
if parameter.default is not None:
input_spec.default = serialize_value(parameter.default, type_struct)
input_spec._passing_style = passing_style
input_spec._parameter_name = parameter.name
inputs.append(input_spec)

#Analyzing the return type annotations.
Expand All @@ -271,16 +293,21 @@ def annotation_to_type_struct(annotation):
if hasattr(return_ann, '_field_types'):
type_struct = annotation_to_type_struct(return_ann._field_types.get(field_name, None))

output_name = _make_name_unique_by_adding_index(field_name, output_names, '_')
output_names.add(output_name)
output_spec = OutputSpec(
name=field_name,
name=output_name,
type=type_struct,
)
output_spec._passing_style = None
output_spec._return_tuple_field_name = field_name
outputs.append(output_spec)
elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty:
output_name = _make_name_unique_by_adding_index(single_output_name_const, output_names, '_') # Fixes exotic, but possible collision: `def func(output_path: OutputPath()) -> str: ...`
output_names.add(output_name)
type_struct = annotation_to_type_struct(signature.return_annotation)
output_spec = OutputSpec(
name=single_output_name_const,
name=output_name,
type=type_struct,
)
output_spec._passing_style = None
Expand Down Expand Up @@ -398,7 +425,7 @@ def get_serializer_and_register_definitions(type_name) -> str:
is_required = isinstance(input, OutputSpec) or not input.optional
line = '_parser.add_argument("{param_flag}", dest="{param_var}", type={param_type}, required={is_required}, default=argparse.SUPPRESS)'.format(
param_flag=param_flag,
param_var=input.name,
param_var=input._parameter_name, # Not input.name, since the inputs could have been renamed
param_type=get_argparse_type_for_input_file(input._passing_style) or get_deserializer_and_register_definitions(input.type),
is_required=str(is_required),
)
Expand Down
116 changes: 104 additions & 12 deletions sdk/python/tests/components/test_python_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,8 +541,7 @@ def consume_file_path(number_file_path: InputPath(int)) -> int:

self.assertEqual(task_factory.component_spec.inputs[0].type, 'Integer')

# TODO: Fix the input names: "number_file_path" parameter should be exposed as "number" input
self.helper_test_component_using_local_call(task_factory, arguments={'number_file_path': "42"}, expected_output_values={'output': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={'number': "42"}, expected_output_values={'output': '42'})


def test_input_text_file(self):
Expand All @@ -556,8 +555,7 @@ def consume_file_path(number_file: InputTextFile(int)) -> int:

self.assertEqual(task_factory.component_spec.inputs[0].type, 'Integer')

# TODO: Fix the input names: "number_file" parameter should be exposed as "number" input
self.helper_test_component_using_local_call(task_factory, arguments={'number_file': "42"}, expected_output_values={'output': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={'number': "42"}, expected_output_values={'output': '42'})


def test_input_binary_file(self):
Expand All @@ -571,8 +569,7 @@ def consume_file_path(number_file: InputBinaryFile(int)) -> int:

self.assertEqual(task_factory.component_spec.inputs[0].type, 'Integer')

# TODO: Fix the input names: "number_file" parameter should be exposed as "number" input
self.helper_test_component_using_local_call(task_factory, arguments={'number_file': "42"}, expected_output_values={'output': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={'number': "42"}, expected_output_values={'output': '42'})


def test_output_path(self):
Expand All @@ -587,8 +584,7 @@ def write_to_file_path(number_file_path: OutputPath(int)):
self.assertEqual(len(task_factory.component_spec.outputs), 1)
self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer')

# TODO: Fix the output names: "number_file_path" should be exposed as "number" output
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number_file_path': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number': '42'})


def test_output_text_file(self):
Expand All @@ -602,8 +598,7 @@ def write_to_file_path(number_file: OutputTextFile(int)):
self.assertEqual(len(task_factory.component_spec.outputs), 1)
self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer')

# TODO: Fix the output names: "number_file" should be exposed as "number" output
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number_file': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number': '42'})


def test_output_binary_file(self):
Expand All @@ -617,8 +612,105 @@ def write_to_file_path(number_file: OutputBinaryFile(int)):
self.assertEqual(len(task_factory.component_spec.outputs), 1)
self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer')

# TODO: Fix the output names: "number_file" should be exposed as "number" output
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number_file': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number': '42'})


def test_file_input_name_conversion(self):
# Checking the input name conversion rules for file inputs:
# For InputPath, the "_path" suffix is removed
# For Input*, the "_file" suffix is removed

from kfp.components import InputPath, InputTextFile, InputBinaryFile, OutputPath, OutputTextFile, OutputBinaryFile
def consume_file_path(
number: int,

number_1a_path: str,
number_1b_file: str,
number_1c_file_path: str,
number_1d_path_file: str,

number_2a_path: InputPath(str),
number_2b_file: InputPath(str),
number_2c_file_path: InputPath(str),
number_2d_path_file: InputPath(str),

number_3a_path: InputTextFile(str),
number_3b_file: InputTextFile(str),
number_3c_file_path: InputTextFile(str),
number_3d_path_file: InputTextFile(str),

number_4a_path: InputBinaryFile(str),
number_4b_file: InputBinaryFile(str),
number_4c_file_path: InputBinaryFile(str),
number_4d_path_file: InputBinaryFile(str),

output_number_2a_path: OutputPath(str),
output_number_2b_file: OutputPath(str),
output_number_2c_file_path: OutputPath(str),
output_number_2d_path_file: OutputPath(str),

output_number_3a_path: OutputTextFile(str),
output_number_3b_file: OutputTextFile(str),
output_number_3c_file_path: OutputTextFile(str),
output_number_3d_path_file: OutputTextFile(str),

output_number_4a_path: OutputBinaryFile(str),
output_number_4b_file: OutputBinaryFile(str),
output_number_4c_file_path: OutputBinaryFile(str),
output_number_4d_path_file: OutputBinaryFile(str),
):
pass

task_factory = comp.func_to_container_op(consume_file_path)
actual_input_names = [input.name for input in task_factory.component_spec.inputs]
actual_output_names = [output.name for output in task_factory.component_spec.outputs]

self.assertEqual(
[
'number',

'number_1a_path',
'number_1b_file',
'number_1c_file_path',
'number_1d_path_file',

'number_2a',
'number_2b',
'number_2c',
'number_2d_path',

'number_3a_path',
'number_3b',
'number_3c_file_path',
'number_3d_path',

'number_4a_path',
'number_4b',
'number_4c_file_path',
'number_4d_path',
],
actual_input_names
)

self.assertEqual(
[
'output_number_2a',
'output_number_2b',
'output_number_2c',
'output_number_2d_path',

'output_number_3a_path',
'output_number_3b',
'output_number_3c_file_path',
'output_number_3d_path',

'output_number_4a_path',
'output_number_4b',
'output_number_4c_file_path',
'output_number_4d_path',
],
actual_output_names
)


def test_end_to_end_python_component_pipeline_compilation(self):
Expand Down

0 comments on commit b3003bd

Please sign in to comment.