From cf3d8347534411d098e0a5bfbd92a350d564cfb3 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Thu, 30 May 2019 13:51:47 -0700 Subject: [PATCH 1/7] add default value type checking --- sdk/python/kfp/dsl/_metadata.py | 16 +++- sdk/python/kfp/dsl/types.py | 81 ++++++++++++--------- sdk/python/tests/compiler/compiler_tests.py | 36 ++++++++- 3 files changed, 95 insertions(+), 38 deletions(-) diff --git a/sdk/python/kfp/dsl/_metadata.py b/sdk/python/kfp/dsl/_metadata.py index 168a528b356..24a5d5d7186 100644 --- a/sdk/python/kfp/dsl/_metadata.py +++ b/sdk/python/kfp/dsl/_metadata.py @@ -70,7 +70,10 @@ def serialize(self): @staticmethod def deserialize(payload): - # If the payload is a string of a dict serialization, convert it back to a dict + '''deserialize expects two types of input: dict and str + 1) If the payload is a string of a dict serialization, convert it back to a dict + 2) If the payload is a string, the type is named as such with no properties. + 3) If the payload is a dict, the type name and properties are extracted. ''' try: import ast payload = ast.literal_eval(payload) @@ -138,6 +141,9 @@ def _annotation_to_typemeta(annotation): '''_annotation_to_type_meta converts an annotation to an instance of TypeMeta Args: annotation(BaseType/str/dict): input/output annotations + BaseType: registered in kfp.dsl.types + str: either a string of a dict serialization or a string of the type name + dict: type name and properties. note that the properties values can be dict. Returns: TypeMeta ''' @@ -221,6 +227,14 @@ def _extract_pipeline_metadata(func): arg_default = arg_defaults[arg] if arg in arg_defaults else None if arg in annotations: arg_type = _annotation_to_typemeta(annotations[arg]) + if 'openapi_schema_validator' in arg_type.properties and arg_default is not None: + from jsonschema import validate + import json + schema_object = arg_type.properties['openapi_schema_validator'] + if isinstance(schema_object, str): + # In case the property value for the schema validator is a string instead of a dict. + schema_object = json.loads(arg_type.properties['openapi_schema_validator']) + validate(instance=arg_default, schema=schema_object) pipeline_meta.inputs.append(ParameterMeta(name=arg, description='', param_type=arg_type, default=arg_default)) #TODO: add descriptions to the metadata diff --git a/sdk/python/kfp/dsl/types.py b/sdk/python/kfp/dsl/types.py index c142cf35742..63ce4ca25fe 100644 --- a/sdk/python/kfp/dsl/types.py +++ b/sdk/python/kfp/dsl/types.py @@ -19,65 +19,76 @@ class BaseType: # Primitive Types class Integer(BaseType): - openapi_schema_validator = { - "type": "integer" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "integer" + } class String(BaseType): - openapi_schema_validator = { - "type": "string" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "string" + } class Float(BaseType): - openapi_schema_validator = { - "type": "number" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "number" + } class Bool(BaseType): - openapi_schema_validator = { - "type": "boolean" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "boolean" + } class List(BaseType): - openapi_schema_validator = { - "type": "array" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "array" + } class Dict(BaseType): - openapi_schema_validator = { - "type": "object", - } + def __init__(self): + self.openapi_schema_validator = { + "type": "object", + } # GCP Types class GCSPath(BaseType): - openapi_schema_validator = { - "type": "string", - "pattern": "^gs://.*$" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "string", + "pattern": "^gs://.*$" + } class GCRPath(BaseType): - openapi_schema_validator = { - "type": "string", - "pattern": "^.*gcr\\.io/.*$" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "string", + "pattern": "^.*gcr\\.io/.*$" + } class GCPRegion(BaseType): - openapi_schema_validator = { - "type": "string" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "string" + } class GCPProjectID(BaseType): '''MetaGCPProjectID: GCP project id''' - openapi_schema_validator = { - "type": "string" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "string" + } # General Types class LocalPath(BaseType): #TODO: add restriction to path - openapi_schema_validator = { - "type": "string" - } + def __init__(self): + self.openapi_schema_validator = { + "type": "string" + } class InconsistentTypeException(Exception): '''InconsistencyTypeException is raised when two types are not consistent''' diff --git a/sdk/python/tests/compiler/compiler_tests.py b/sdk/python/tests/compiler/compiler_tests.py index 8e4ff283a7e..0bd56fd94da 100644 --- a/sdk/python/tests/compiler/compiler_tests.py +++ b/sdk/python/tests/compiler/compiler_tests.py @@ -364,7 +364,7 @@ def test_py_param_substitutions(self): def test_type_checking_with_consistent_types(self): """Test type check pipeline parameters against component metadata.""" @component - def a_op(field_m: {'GCSPath': {'path_type': 'file', 'file_type':'tsv'}}, field_o: 'Integer'): + def a_op(field_m: {'GCSPath': {'path_type': 'file', 'file_type':'tsv'}}, field_o: Integer()): return ContainerOp( name = 'operator a', image = 'gcr.io/ml-pipeline/component-b', @@ -394,7 +394,7 @@ def my_pipeline(a: {'GCSPath': {'path_type':'file', 'file_type': 'tsv'}}='good', def test_type_checking_with_inconsistent_types(self): """Test type check pipeline parameters against component metadata.""" @component - def a_op(field_m: {'GCSPath': {'path_type': 'file', 'file_type':'tsv'}}, field_o: 'Integer'): + def a_op(field_m: {'GCSPath': {'path_type': 'file', 'file_type':'tsv'}}, field_o: Integer()): return ContainerOp( name = 'operator a', image = 'gcr.io/ml-pipeline/component-b', @@ -423,6 +423,38 @@ def my_pipeline(a: {'GCSPath': {'path_type':'file', 'file_type': 'csv'}}='good', finally: shutil.rmtree(tmpdir) + def test_type_checking_with_json_schema(self): + """Test type check pipeline parameters against the json schema.""" + @component + def a_op(field_m: {'GCRPath': {'openapi_schema_validator': {"type": "string", "pattern": "^.*gcr\\.io/.*$"}}}, field_o: 'Integer'): + return ContainerOp( + name = 'operator a', + image = 'gcr.io/ml-pipeline/component-b', + arguments = [ + '--field-l', field_m, + '--field-o', field_o, + ], + ) + + @pipeline( + name='p1', + description='description1' + ) + def my_pipeline(a: {'GCRPath': {'openapi_schema_validator': {"type": "string", "pattern": "^.*gcr\\.io/.*$"}}}='good', b: 'Integer'=12): + a_op(field_m=a, field_o=b) + + test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') + sys.path.append(test_data_dir) + tmpdir = tempfile.mkdtemp() + try: + simple_package_path = os.path.join(tmpdir, 'simple.tar.gz') + import jsonschema + with self.assertRaises(jsonschema.exceptions.ValidationError): + compiler.Compiler().compile(my_pipeline, simple_package_path, type_check=True) + + finally: + shutil.rmtree(tmpdir) + def test_compile_pipeline_with_after(self): def op(): return dsl.ContainerOp( From 76d2955e03ce96d4fa67203183631722b461d855 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Thu, 30 May 2019 13:55:58 -0700 Subject: [PATCH 2/7] add jsonschema dependency --- sdk/python/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 21b346a94bf..8ebb651541b 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -32,6 +32,7 @@ 'cloudpickle', 'kfp-server-api >= 0.1.18, < 0.1.19', #Update the upper version whenever a new version of the kfp-server-api package is released. Update the lower version when there is a breaking change in kfp-server-api. 'argo-models == 2.2.1a', #2.2.1a is equivalent to argo 2.2.1 + 'jsonschema >= 3.0.1' ] setup( From 9ef41cb6c10349fc621d04c925e0e31a2b18e139 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 31 May 2019 16:26:01 -0700 Subject: [PATCH 3/7] fix unit test error --- sdk/python/tests/dsl/component_tests.py | 4 ++-- sdk/python/tests/dsl/pipeline_tests.py | 2 +- sdk/python/tests/dsl/type_tests.py | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sdk/python/tests/dsl/component_tests.py b/sdk/python/tests/dsl/component_tests.py index c6e54c56b31..05c3b6cd75f 100644 --- a/sdk/python/tests/dsl/component_tests.py +++ b/sdk/python/tests/dsl/component_tests.py @@ -37,9 +37,9 @@ def componentA(a: {'ArtifactA': {'file_type': 'csv'}}, b: Integer() = 12, c: {'A golden_meta = ComponentMeta(name='componentA', description='') golden_meta.inputs.append(ParameterMeta(name='a', description='', param_type=TypeMeta(name='ArtifactA', properties={'file_type': 'csv'}))) - golden_meta.inputs.append(ParameterMeta(name='b', description='', param_type=TypeMeta(name='Integer'), default=12)) + golden_meta.inputs.append(ParameterMeta(name='b', description='', param_type=TypeMeta(name='Integer', properties={'openapi_schema_validator': {"type": "integer"}}), default=12)) golden_meta.inputs.append(ParameterMeta(name='c', description='', param_type=TypeMeta(name='ArtifactB', properties={'path_type':'file', 'file_type': 'tsv'}), default='gs://hello/world')) - golden_meta.outputs.append(ParameterMeta(name='model', description='', param_type=TypeMeta(name='Integer'))) + golden_meta.outputs.append(ParameterMeta(name='model', description='', param_type=TypeMeta(name='Integer', properties={'openapi_schema_validator': {"type": "integer"}}))) self.assertEqual(containerOp._metadata, golden_meta) diff --git a/sdk/python/tests/dsl/pipeline_tests.py b/sdk/python/tests/dsl/pipeline_tests.py index 3d916469fde..c8875bb2b47 100644 --- a/sdk/python/tests/dsl/pipeline_tests.py +++ b/sdk/python/tests/dsl/pipeline_tests.py @@ -71,7 +71,7 @@ def my_pipeline1(a: {'Schema': {'file_type': 'csv'}}='good', b: Integer()=12): golden_meta = PipelineMeta(name='p1', description='description1') golden_meta.inputs.append(ParameterMeta(name='a', description='', param_type=TypeMeta(name='Schema', properties={'file_type': 'csv'}), default='good')) - golden_meta.inputs.append(ParameterMeta(name='b', description='', param_type=TypeMeta(name='Integer'), default=12)) + golden_meta.inputs.append(ParameterMeta(name='b', description='', param_type=TypeMeta(name='Integer', properties={'openapi_schema_validator': {"type": "integer"}}), default=12)) pipeline_meta = _extract_pipeline_metadata(my_pipeline1) self.assertEqual(pipeline_meta, golden_meta) \ No newline at end of file diff --git a/sdk/python/tests/dsl/type_tests.py b/sdk/python/tests/dsl/type_tests.py index f53aceaaef8..dff328a553a 100644 --- a/sdk/python/tests/dsl/type_tests.py +++ b/sdk/python/tests/dsl/type_tests.py @@ -22,7 +22,10 @@ def test_class_to_dict(self): gcspath_dict = _instance_to_dict(GCSPath()) golden_dict = { 'GCSPath': { - + 'openapi_schema_validator': { + "type": "string", + "pattern": "^gs://.*$" + } } } self.assertEqual(golden_dict, gcspath_dict) From d6cfc2cd50bc3ccd7b13e1c0a5f38b3bfc58c840 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 31 May 2019 17:16:19 -0700 Subject: [PATCH 4/7] workaround for travis python package installation --- .travis.yml | 1 + sdk/python/setup.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8be6d31e406..9e54f1f0036 100644 --- a/.travis.yml +++ b/.travis.yml @@ -65,6 +65,7 @@ matrix: - language: python python: "3.6" env: TOXENV=py36 + install: pip3 install jsonschema==3.0.1 script: # DSL tests - cd $TRAVIS_BUILD_DIR/sdk/python diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 8ebb651541b..21b346a94bf 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -32,7 +32,6 @@ 'cloudpickle', 'kfp-server-api >= 0.1.18, < 0.1.19', #Update the upper version whenever a new version of the kfp-server-api package is released. Update the lower version when there is a breaking change in kfp-server-api. 'argo-models == 2.2.1a', #2.2.1a is equivalent to argo 2.2.1 - 'jsonschema >= 3.0.1' ] setup( From dfc9bc1246ccf9c91bad6d17af1870d7c463a604 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 31 May 2019 17:50:05 -0700 Subject: [PATCH 5/7] add back jsonschema version --- sdk/python/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 21b346a94bf..8ebb651541b 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -32,6 +32,7 @@ 'cloudpickle', 'kfp-server-api >= 0.1.18, < 0.1.19', #Update the upper version whenever a new version of the kfp-server-api package is released. Update the lower version when there is a breaking change in kfp-server-api. 'argo-models == 2.2.1a', #2.2.1a is equivalent to argo 2.2.1 + 'jsonschema >= 3.0.1' ] setup( From 0496ea29de6541c082a0ec0b1722c830ab09e20d Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Mon, 3 Jun 2019 13:17:59 -0700 Subject: [PATCH 6/7] fix sample test error in type checking sample --- .../notebooks/DSL Static Type Checking.ipynb | 39 ++++++++++++------- ...ghtweight Python components - basics.ipynb | 31 ++++++++++----- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/samples/notebooks/DSL Static Type Checking.ipynb b/samples/notebooks/DSL Static Type Checking.ipynb index caa2878e1ed..f50e957d530 100644 --- a/samples/notebooks/DSL Static Type Checking.ipynb +++ b/samples/notebooks/DSL Static Type Checking.ipynb @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -255,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -393,7 +393,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -417,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -477,7 +477,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -507,7 +507,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -565,7 +565,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -600,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -630,7 +630,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -687,7 +687,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -710,7 +710,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -733,9 +733,18 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Integer has a property openapi_schema_validator that the latter does not.\n", + "Component \"task_factory_a\" is expecting field_o to be type(Integer), but the passed argument is type({'Integer': {'openapi_schema_validator': {'type': 'integer'}}})\n" + ] + } + ], "source": [ "@component\n", "def task_factory_a(field_m: {'GCSPath': {'openapi_schema_validator': '{\"type\": \"string\", \"pattern\": \"^gs://.*$\"}'}}, field_o: 'Integer'):\n", @@ -751,7 +760,7 @@ "# Pipeline input types are also checked against the component I/O types.\n", "@dsl.pipeline(name='type_check_g',\n", " description='')\n", - "def pipeline_g(a: {'GCSPath': {'openapi_schema_validator': '{\"type\": \"string\", \"pattern\": \"^gs://.*$\"}'}}='good', b: Integer()=12):\n", + "def pipeline_g(a: {'GCSPath': {'openapi_schema_validator': '{\"type\": \"string\", \"pattern\": \"^gs://.*$\"}'}}='gs://kfp-path', b: Integer()=12):\n", " task_factory_a(field_m=a, field_o=b)\n", "\n", "try:\n", @@ -769,7 +778,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ diff --git a/samples/notebooks/Lightweight Python components - basics.ipynb b/samples/notebooks/Lightweight Python components - basics.ipynb index 87a95852b11..d66b032e928 100644 --- a/samples/notebooks/Lightweight Python components - basics.ipynb +++ b/samples/notebooks/Lightweight Python components - basics.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -80,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -96,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -154,9 +154,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "MyDivmodOutput(quotient=14, remainder=2)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "my_divmod(100, 7)" ] @@ -172,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -189,7 +200,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -223,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -278,7 +289,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.4rc1" + "version": "3.6.5" } }, "nbformat": 4, From 19e2a3c3b4d4d48490d52be0a6c71b791f16a884 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Mon, 3 Jun 2019 14:41:59 -0700 Subject: [PATCH 7/7] add jsonschema in requirements such that sphinx works fine --- sdk/python/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt index 1a9c21d69dd..4c6e5a59016 100644 --- a/sdk/python/requirements.txt +++ b/sdk/python/requirements.txt @@ -11,3 +11,4 @@ google-auth>=1.6.1 requests_toolbelt>=0.8.0 kfp-server-api >= 0.1.18, < 0.1.19 argo-models == 2.2.1a +jsonschema >= 3.0.1