From 0cb22179348ab9da952381980487cb8d75914d52 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 23 Oct 2023 09:04:43 -0700 Subject: [PATCH 01/25] chore(components): update GCPC custom job docstrings PiperOrigin-RevId: 575835146 --- .../v1/custom_job/utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py index 4faba30c68f..e83d374175e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py @@ -72,27 +72,27 @@ def create_custom_training_job_from_component( # fmt: off """Convert a KFP component into Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. - This utility converts a [KFP component ](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. + This utility converts a [KFP component](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. Args: component_spec: A KFP component. display_name: The name of the CustomJob. If not provided the component's name will be used instead. - replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information. ](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) - machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information ](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). - accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). + replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information.](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) + machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). + accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set. boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot be changed as a pipeline parameter. boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). `boot_disk_size_gb` is set as a static value and cannot be changed as a pipeline parameter. timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. - service_account: Sets the default service account for workload run-as account. The [service account ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + service_account: Sets the default service account for workload run-as account. The [service account](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs. - enable_web_access: Whether you want Vertex AI to enable [interactive shell access ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. + enable_web_access: Whether you want Vertex AI to enable [interactive shell access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network. - nfs_mounts: A list of [NfsMount ](https://cloud.devsite.corp.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training ](https://cloud.devsite.corp.google.com/vertex-ai/docs/training/train-nfs-share). - base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). + nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share). + base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). Returns: From 2054b7c45d4831c787115563c8be0048abcb9be1 Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn <139039524+magdalenakuhn17@users.noreply.github.com> Date: Tue, 24 Oct 2023 00:06:06 +0200 Subject: [PATCH 02/25] feat(components) Extend kserve component (#10136) * add runtime version, resource requests and resource limits * adjust kservedeployer * Update components/kserve/src/kservedeployer.py Co-authored-by: Tommy Li * Update components/kserve/src/kservedeployer.py Co-authored-by: Tommy Li * Update components/kserve/src/kservedeployer.py Co-authored-by: Tommy Li --------- Co-authored-by: Tommy Li --- components/kserve/README.md | 4 +- components/kserve/component.yaml | 40 +++++++++++-------- components/kserve/src/kservedeployer.py | 52 ++++++++++++++++++++----- 3 files changed, 69 insertions(+), 27 deletions(-) diff --git a/components/kserve/README.md b/components/kserve/README.md index 66f0e59b9f0..c6a42842efe 100644 --- a/components/kserve/README.md +++ b/components/kserve/README.md @@ -39,6 +39,9 @@ kserve_op = components.load_component_from_url('https://raw.githubusercontent.co | canary_traffic_percent | `100` | The traffic split percentage between the candidate model and the last ready model | | namespace | | Kubernetes namespace where the KServe service is deployed. If no namespace is provided, `anonymous` will be used unless a namespace is provided in the `inferenceservice_yaml` argument. | | framework | | Machine learning framework for model serving. Currently the supported frameworks are `tensorflow`, `pytorch`, `sklearn`, `xgboost`, `onnx`, `triton`, `pmml`, and `lightgbm`. | +| runtime_version | `latest` | Runtime Version of Machine Learning Framework | +| resource_requests | `{"cpu": "0.5", "memory": "512Mi"}` | CPU and Memory requests for Model Serving | +| resource_limits | `{"cpu": "1", "memory": "1Gi"}` | CPU and Memory limits for Model Serving | | custom_model_spec | `{}` | Custom model runtime container spec in JSON. Sample spec: `{"image": "codait/max-object-detector", "port":5000, "name": "test-container"}` | | inferenceservice_yaml | `{}` | Raw InferenceService serialized YAML for deployment. Use this if you need additional configurations for your InferenceService. | | autoscaling_target | `0` | Autoscaling Target Number. If not 0, sets the following annotation on the InferenceService: `autoscaling.knative.dev/target` | @@ -185,4 +188,3 @@ kserve_op( inferenceservice_yaml=isvc_yaml ) ``` - diff --git a/components/kserve/component.yaml b/components/kserve/component.yaml index 4bdcaac7b56..9d7b97e3e23 100644 --- a/components/kserve/component.yaml +++ b/components/kserve/component.yaml @@ -1,25 +1,28 @@ name: Serve a model with KServe description: Serve Models using KServe inputs: - - {name: Action, type: String, default: 'create', description: 'Action to execute on KServe'} - - {name: Model Name, type: String, default: '', description: 'Name to give to the deployed model'} - - {name: Model URI, type: String, default: '', description: 'Path of the S3 or GCS compatible directory containing the model.'} - - {name: Canary Traffic Percent, type: String, default: '100', description: 'The traffic split percentage between the candidate model and the last ready model'} - - {name: Namespace, type: String, default: '', description: 'Kubernetes namespace where the KServe service is deployed.'} - - {name: Framework, type: String, default: '', description: 'Machine Learning Framework for Model Serving.'} - - {name: Custom Model Spec, type: String, default: '{}', description: 'Custom model runtime container spec in JSON'} - - {name: Autoscaling Target, type: String, default: '0', description: 'Autoscaling Target Number'} - - {name: Service Account, type: String, default: '', description: 'ServiceAccount to use to run the InferenceService pod'} - - {name: Enable Istio Sidecar, type: Bool, default: 'True', description: 'Whether to enable istio sidecar injection'} - - {name: InferenceService YAML, type: String, default: '{}', description: 'Raw InferenceService serialized YAML for deployment'} - - {name: Watch Timeout, type: String, default: '300', description: "Timeout seconds for watching until InferenceService becomes ready."} - - {name: Min Replicas, type: String, default: '-1', description: 'Minimum number of InferenceService replicas'} - - {name: Max Replicas, type: String, default: '-1', description: 'Maximum number of InferenceService replicas'} - - {name: Request Timeout, type: String, default: '60', description: "Specifies the number of seconds to wait before timing out a request to the component."} - - {name: Enable ISVC Status, type: Bool, default: 'True', description: "Specifies whether to store the inference service status as the output parameter"} + - {name: Action, type: String, default: 'create', description: 'Action to execute on KServe'} + - {name: Model Name, type: String, default: '', description: 'Name to give to the deployed model'} + - {name: Model URI, type: String, default: '', description: 'Path of the S3 or GCS compatible directory containing the model.'} + - {name: Canary Traffic Percent, type: String, default: '100', description: 'The traffic split percentage between the candidate model and the last ready model'} + - {name: Namespace, type: String, default: '', description: 'Kubernetes namespace where the KServe service is deployed.'} + - {name: Framework, type: String, default: '', description: 'Machine Learning Framework for Model Serving.'} + - {name: Runtime Version, type: String, default: 'latest', description: 'Runtime Version of Machine Learning Framework'} + - {name: Resource Requests, type: String, default: '{"cpu": "0.5", "memory": "512Mi"}', description: 'CPU and Memory requests for Model Serving'} + - {name: Resource Limits, type: String, default: '{"cpu": "1", "memory": "1Gi"}', description: 'CPU and Memory limits for Model Serving'} + - {name: Custom Model Spec, type: String, default: '{}', description: 'Custom model runtime container spec in JSON'} + - {name: Autoscaling Target, type: String, default: '0', description: 'Autoscaling Target Number'} + - {name: Service Account, type: String, default: '', description: 'ServiceAccount to use to run the InferenceService pod'} + - {name: Enable Istio Sidecar, type: Bool, default: 'True', description: 'Whether to enable istio sidecar injection'} + - {name: InferenceService YAML, type: String, default: '{}', description: 'Raw InferenceService serialized YAML for deployment'} + - {name: Watch Timeout, type: String, default: '300', description: "Timeout seconds for watching until InferenceService becomes ready."} + - {name: Min Replicas, type: String, default: '-1', description: 'Minimum number of InferenceService replicas'} + - {name: Max Replicas, type: String, default: '-1', description: 'Maximum number of InferenceService replicas'} + - {name: Request Timeout, type: String, default: '60', description: "Specifies the number of seconds to wait before timing out a request to the component."} + - {name: Enable ISVC Status, type: Bool, default: 'True', description: "Specifies whether to store the inference service status as the output parameter"} outputs: - - {name: InferenceService Status, type: String, description: 'Status JSON output of InferenceService'} + - {name: InferenceService Status, type: String, description: 'Status JSON output of InferenceService'} implementation: container: image: quay.io/aipipeline/kserve-component:v0.11.1 @@ -32,6 +35,9 @@ implementation: --canary-traffic-percent, {inputValue: Canary Traffic Percent}, --namespace, {inputValue: Namespace}, --framework, {inputValue: Framework}, + --runtime-version, {inputValue: Runtime Version}, + --resource-requests, {inputValue: Resource Requests}, + --resource-limits, {inputValue: Resource Limits}, --custom-model-spec, {inputValue: Custom Model Spec}, --autoscaling-target, {inputValue: Autoscaling Target}, --service-account, {inputValue: Service Account}, diff --git a/components/kserve/src/kservedeployer.py b/components/kserve/src/kservedeployer.py index db84e41727e..c8799332f76 100644 --- a/components/kserve/src/kservedeployer.py +++ b/components/kserve/src/kservedeployer.py @@ -21,6 +21,7 @@ import yaml from kubernetes import client +from kubernetes.client.models import V1ResourceRequirements from kserve import constants from kserve import KServeClient @@ -50,8 +51,9 @@ } -def create_predictor_spec(framework, storage_uri, canary_traffic_percent, - service_account, min_replicas, max_replicas, containers, request_timeout): +def create_predictor_spec(framework, runtime_version, resource_requests, resource_limits, + storage_uri, canary_traffic_percent, service_account, min_replicas, + max_replicas, containers, request_timeout): """ Create and return V1beta1PredictorSpec to be used in a V1beta1InferenceServiceSpec object. @@ -81,7 +83,14 @@ def create_predictor_spec(framework, storage_uri, canary_traffic_percent, setattr( predictor_spec, framework, - AVAILABLE_FRAMEWORKS[framework](storage_uri=storage_uri) + AVAILABLE_FRAMEWORKS[framework]( + storage_uri=storage_uri, + resources=V1ResourceRequirements( + requests=resource_requests, + limits=resource_limits + ), + runtime_version=runtime_version + ) ) return predictor_spec @@ -178,10 +187,10 @@ def submit_api_request(kserve_client, action, name, isvc, namespace=None, return outputs -def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, - framework, custom_model_spec, service_account, inferenceservice_yaml, - request_timeout, autoscaling_target=0, enable_istio_sidecar=True, - watch_timeout=300, min_replicas=0, max_replicas=0): +def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, framework, + runtime_version, resource_requests, resource_limits, custom_model_spec, + service_account, inferenceservice_yaml, request_timeout, autoscaling_target=0, + enable_istio_sidecar=True, watch_timeout=300, min_replicas=0, max_replicas=0): """ Perform the specified action. If the action is not 'delete' and `inferenceService_yaml` was provided, the dict representation of the YAML will be sent directly to the @@ -224,8 +233,9 @@ def perform_action(action, model_name, model_uri, canary_traffic_percent, namesp # Build the V1beta1PredictorSpec. predictor_spec = create_predictor_spec( - framework, model_uri, canary_traffic_percent, service_account, - min_replicas, max_replicas, containers, request_timeout + framework, runtime_version, resource_requests, resource_limits, + model_uri, canary_traffic_percent, service_account, min_replicas, + max_replicas, containers, request_timeout ) isvc = create_inference_service(metadata, predictor_spec) @@ -287,6 +297,24 @@ def main(): str(list(AVAILABLE_FRAMEWORKS.keys())), default="" ) + parser.add_argument( + "--runtime-version", + type=str, + help="Runtime Version of Machine Learning Framework", + default="latest" + ) + parser.add_argument( + "--resource-requests", + type=json.loads, + help="CPU and Memory requests for Model Serving", + default='{"cpu": "0.5", "memory": "512Mi"}', + ) + parser.add_argument( + "--resource-limits", + type=json.loads, + help="CPU and Memory limits for Model Serving", + default='{"cpu": "1", "memory": "1Gi"}', + ) parser.add_argument( "--custom-model-spec", type=json.loads, @@ -342,6 +370,9 @@ def main(): canary_traffic_percent = int(args.canary_traffic_percent) namespace = args.namespace framework = args.framework.lower() + runtime_version = args.runtime_version.lower() + resource_requests = args.resource_requests + resource_limits = args.resource_limits output_path = args.output_path custom_model_spec = args.custom_model_spec autoscaling_target = int(args.autoscaling_target) @@ -381,6 +412,9 @@ def main(): canary_traffic_percent=canary_traffic_percent, namespace=namespace, framework=framework, + runtime_version=runtime_version, + resource_requests=resource_requests, + resource_limits=resource_limits, custom_model_spec=custom_model_spec, autoscaling_target=autoscaling_target, service_account=service_account, From 21079b5910e597a38b67853f3ecfb3929344371e Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 23 Oct 2023 17:32:20 -0700 Subject: [PATCH 03/25] feat(components): [text2sql] Implement preprocess component logic PiperOrigin-RevId: 575976269 --- .../model_evaluation/text2sql_preprocess/component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py index 4f9aa155d31..583da4c23bb 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py @@ -24,7 +24,7 @@ @container_component def text2sql_evaluation_preprocess( gcp_resources: OutputPath(str), - model_inference_input_path: OutputPath(str), + model_inference_input_path: OutputPath(list), project: str, location: str, evaluation_data_source_path: str, @@ -72,7 +72,7 @@ def text2sql_evaluation_preprocess( Returns: gcp_resources (str): Serialized gcp_resources proto tracking the custom job. - model_inference_input_path (str): + model_inference_input_path (list): The GCS path to save preprocessed data to run batch prediction to get table names. """ From ebb42450d0b07eaa8de35a3f6b70eacb5f26f0d8 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 23 Oct 2023 17:41:48 -0700 Subject: [PATCH 04/25] feat(components): [text2sql] Generate table names by model batch prediction PiperOrigin-RevId: 575978329 --- .../evaluation_llm_text2sql_pipeline.py | 40 +++++++++++++++++-- .../component.py | 17 +++++--- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py index f9e59493b44..e106efa6981 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -12,13 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. """Text2SQL evaluation pipeline.""" +from typing import Dict from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_evaluation.component import text2sql_evaluation as Text2SQLEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_preprocess.component import text2sql_evaluation_preprocess as Text2SQLEvaluationPreprocessOp from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_validate_and_process.component import text2sql_evaluation_validate_and_process as Text2SQLEvaluationValidateAndProcessOp from google_cloud_pipeline_components.types import artifact_types +from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp import kfp +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER _PIPELINE_NAME = 'evaluation_llm_text2sql_pipeline' @@ -34,6 +37,9 @@ def evaluation_llm_text2sql_pipeline( evaluation_method: str = 'parser', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, location: str = _placeholders.LOCATION_PLACEHOLDER, + model_parameters: Dict[str, str] = {}, + batch_predict_instances_format: str = 'jsonl', + batch_predict_predictions_format: str = 'jsonl', machine_type: str = 'e2-highmem-16', service_account: str = '', network: str = '', @@ -61,6 +67,16 @@ def evaluation_llm_text2sql_pipeline( Default value is the same project used to run the pipeline. location: Optional. The GCP region that runs the pipeline components. Default value is the same location used to run the pipeline. + model_parameters: Optional. The parameters that govern the predictions, e.g. + temperature, + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. If not set, + default to "jsonl". For more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_instances_format: The format in which perdictions are made, + must be one of the Model's supportedInputStorageFormats. If not set, + default to "jsonl". For more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. machine_type: The machine type of this custom job. If not set, defaulted to `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource @@ -85,7 +101,7 @@ def evaluation_llm_text2sql_pipeline( ) get_vertex_model_task.set_display_name('get-vertex-model') - _ = Text2SQLEvaluationPreprocessOp( + preprocess_task = Text2SQLEvaluationPreprocessOp( project=project, location=location, evaluation_data_source_path=evaluation_data_source_path, @@ -97,12 +113,28 @@ def evaluation_llm_text2sql_pipeline( encryption_spec_key_name=encryption_spec_key_name, ) + batch_predict_table_names_task = ModelBatchPredictOp( + job_display_name='text2sql-batch-predict-table-names-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + model=get_vertex_model_task.outputs['artifact'], + location=location, + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_source_uris=preprocess_task.outputs['model_inference_input_path'], + model_parameters=model_parameters, + gcs_destination_output_uri_prefix=( + f'{PIPELINE_ROOT_PLACEHOLDER}/batch_predict_table_names_output' + ), + encryption_spec_key_name=encryption_spec_key_name, + project=project, + ) + _ = Text2SQLEvaluationValidateAndProcessOp( project=project, location=location, - # TODO(bozhengbz) Add value to model_inference_results_path - # when model batch prediction component is added. - model_inference_results_path='gs://test/model_inference_results.json', + model_inference_type='table_name_case', + model_inference_results_directory=batch_predict_table_names_task.outputs[ + 'gcs_output_directory' + ], tables_metadata_path=tables_metadata_path, prompt_template_path=prompt_template_path, machine_type=machine_type, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py index bc2deb06d4f..3f1b0972620 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py @@ -16,7 +16,9 @@ from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils from google_cloud_pipeline_components._implementation.model_evaluation import version +from kfp.dsl import Artifact from kfp.dsl import container_component +from kfp.dsl import Input from kfp.dsl import OutputPath from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER @@ -24,10 +26,11 @@ @container_component def text2sql_evaluation_validate_and_process( gcp_resources: OutputPath(str), - model_inference_input_path: OutputPath(str), + model_inference_input_path: OutputPath(list), project: str, location: str, - model_inference_results_path: str, + model_inference_type: str, + model_inference_results_directory: Input[Artifact], tables_metadata_path: str, prompt_template_path: str = '', display_name: str = 'text2sql-evaluation-validate-and-process', @@ -41,8 +44,11 @@ def text2sql_evaluation_validate_and_process( Args: project: Required. The GCP project that runs the pipeline component. location: Required. The GCP region that runs the pipeline component. - model_inference_results_path: Required. The path for json file containing - text2sql model inference results from the last step. + model_inference_type: Required. Model inference type to differentiate + model inference results validataion steps, values can be table_name_case + or column_name_case. + model_inference_results_directory: Required. The directory to store all of + files containing text2sql model inference results from the last step. tables_metadata_path: Required. The path for json file containing database metadata, including table names, schema fields. prompt_template_path: Required. The path for json file containing prompt @@ -86,7 +92,8 @@ def text2sql_evaluation_validate_and_process( f'--text2sql_validate_and_process={True}', f'--project={project}', f'--location={location}', - f'--model_inference_results_path={model_inference_results_path}', + f'--model_inference_type={model_inference_type}', + f'--model_inference_results_directory={model_inference_results_directory.path}', f'--tables_metadata_path={tables_metadata_path}', f'--prompt_template_path={prompt_template_path}', f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', From c0ef67cb8602dd1b9bb80721fe910e9019a399b4 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 24 Oct 2023 09:25:54 -0700 Subject: [PATCH 05/25] chore(components): update GCPC docstrings PiperOrigin-RevId: 576164819 --- .../preview/custom_job/utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py index 9651cc84677..93bc3221a31 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py @@ -73,27 +73,27 @@ def create_custom_training_job_from_component( # fmt: off """Convert a KFP component into Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. - This utility converts a [KFP component ](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. + This utility converts a [KFP component](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. Args: component_spec: A KFP component. display_name: The name of the CustomJob. If not provided the component's name will be used instead. - replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information. ](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) - machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information ](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). - accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). + replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information.](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) + machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). + accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set. boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot be changed as a pipeline parameter. boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). `boot_disk_size_gb` is set as a static value and cannot be changed as a pipeline parameter. timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. - service_account: Sets the default service account for workload run-as account. The [service account ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + service_account: Sets the default service account for workload run-as account. The [service account](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs. - enable_web_access: Whether you want Vertex AI to enable [interactive shell access ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. + enable_web_access: Whether you want Vertex AI to enable [interactive shell access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network. - nfs_mounts: A list of [NfsMount ](https://cloud.devsite.corp.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training ](https://cloud.devsite.corp.google.com/vertex-ai/docs/training/train-nfs-share). - base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). + nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share). + base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.) From 04aac259a4d860eab1195654c02906a20643e6f0 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 24 Oct 2023 11:49:48 -0700 Subject: [PATCH 06/25] chore(components): add GCPC Python 3.7 EOL warning PiperOrigin-RevId: 576214273 --- .../google_cloud_pipeline_components/__init__.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/__init__.py b/components/google-cloud/google_cloud_pipeline_components/__init__.py index 8489662cf6d..42ec791d4b1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/__init__.py @@ -12,4 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. """Google Cloud Pipeline Components.""" -from google_cloud_pipeline_components.version import __version__ +import sys +import warnings + +if sys.version_info < (3, 8): + warnings.warn( + ( + 'Python 3.7 has reached end-of-life. Google Cloud Pipeline Components' + ' will drop support for Python 3.7 on April 23, 2024. To use new' + ' versions of the KFP SDK after that date, you will need to upgrade' + ' to Python >= 3.8. See https://devguide.python.org/versions/ for' + ' more details.' + ), + FutureWarning, + stacklevel=2, + ) From c383eb64cd0819a412a4535a25e68b2467bc53f1 Mon Sep 17 00:00:00 2001 From: Changyu Zhu Date: Tue, 24 Oct 2023 13:57:24 -0700 Subject: [PATCH 07/25] chore(components): Update AutoML Vision data converter component PiperOrigin-RevId: 576269528 --- .../preview/automl/vision/data_converter.py | 47 +++---------------- 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py index 6e6b108aa9b..6ccc98dd40d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py @@ -27,7 +27,7 @@ def data_converter( input_file_path: str, input_file_type: str, objective: str, - output_dir: str, + output_dir: dsl.OutputPath(str), gcp_resources: dsl.OutputPath(str), location: str = 'us-central1', timeout: str = '604800s', @@ -36,10 +36,6 @@ def data_converter( output_shape: Optional[str] = None, split_ratio: Optional[str] = None, num_shard: Optional[str] = None, - output_fps: Optional[int] = None, - num_frames: Optional[int] = None, - min_duration_sec: Optional[float] = None, - pos_neg_ratio: Optional[float] = None, encryption_spec_key_name: str = '', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): @@ -51,21 +47,17 @@ def data_converter( input_file_path: Input file path. Please refer to different input formats in Vertex AI Documentation. For example, [image classification prepare data](https://cloud.google.com/vertex-ai/docs/image-data/classification/prepare-data) page. input_file_type: 'csv', 'jsonl', or 'coco_json'. Must be one of the input file types supported by the objective. objective: One of 'icn', 'iod', 'isg', 'vcn', or 'var'. - output_dir: Cloud Storage directory for storing converted data and pipeline information. location: Location for creating the custom training job. If not set, default to us-central1. timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". service_account: Sets the default service account for workload run-as account. The [service account](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. machine_type: [Machine type](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types) for the CustomJob. If conversion failed, consider using a machine type with more RAM or splitting dataset into smaller pieces. - output_shape: Video only. Output shape (height,width) for video frames. + output_shape: Output shape (height,width) for images. split_ratio: Proportion of data to split into train/validation/test, separated by comma. num_shard: Number of train/validation/test shards, separated by comma. - output_fps: Video only. Output frames per second. - num_frames: VAR only. Number of frames inside a single video clip window. - min_duration_sec: VAR only. Minimum duration of a video clip annotation in seconds. - pos_neg_ratio: VAR only. Sampling ratio between positive and negative segments. encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run. Returns: + output_dir: Cloud Storage directory storing converted data and pipeline information. gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob. """ # fmt: on @@ -99,6 +91,7 @@ def data_converter( ' "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/data-converter",' ), '"args": [', + '"--enable_input_validation","true",', '"--input_file_path", "', input_file_path, '",', @@ -129,30 +122,6 @@ def data_converter( [',"--num_shard","', num_shard, '"'] ), ), - dsl.IfPresentPlaceholder( - input_name='output_fps', - then=dsl.ConcatPlaceholder( - [',"--output_fps","', output_fps, '"'] - ), - ), - dsl.IfPresentPlaceholder( - input_name='num_frames', - then=dsl.ConcatPlaceholder( - [',"--num_frames","', num_frames, '"'] - ), - ), - dsl.IfPresentPlaceholder( - input_name='min_duration_sec', - then=dsl.ConcatPlaceholder( - [',"--min_duration_sec","', min_duration_sec, '"'] - ), - ), - dsl.IfPresentPlaceholder( - input_name='pos_neg_ratio', - then=dsl.ConcatPlaceholder( - [',"--pos_neg_ratio","', pos_neg_ratio, '"'] - ), - ), ']}}],', '"scheduling": {', '"timeout": "', @@ -165,12 +134,8 @@ def data_converter( ['"service_account": "', service_account, '",'] ), ), - '"enable_web_access": false,', - '"base_output_directory": {', - '"output_uri_prefix": "', - output_dir, - '"', - '}},', + '"enable_web_access": false', + '},', '"encryption_spec": {', '"kms_key_name": "', encryption_spec_key_name, From e3b186379186d771316f39257e27d315c2c10a77 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 24 Oct 2023 17:06:08 -0500 Subject: [PATCH 08/25] chore(sdk): add Python 3.7 EOL notice (#10139) --- sdk/RELEASE.md | 1 + sdk/python/kfp/__init__.py | 11 ++++++ sdk/python/kfp/dsl/component_factory.py | 6 ++++ sdk/python/kfp/dsl/component_factory_test.py | 14 ++++++++ sdk/python/kfp/init_test.py | 36 ++++++++++++++++++++ 5 files changed, 68 insertions(+) create mode 100644 sdk/python/kfp/init_test.py diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 3e43e2633f0..61dfc86d14f 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -6,6 +6,7 @@ * Support collecting outputs from conditional branches using `dsl.OneOf` [\#10067](https://github.com/kubeflow/pipelines/pull/10067) ## Deprecations +* Add notice of Python 3.7 support removal on April 23, 2024 [\#10139](https://github.com/kubeflow/pipelines/pull/10139) ## Bug fixes and other changes * Fix type on `dsl.ParallelFor` sub-DAG output when a `dsl.Collected` is used. Non-functional fix. [\#10069](https://github.com/kubeflow/pipelines/pull/10069) diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index 74d0332f3ba..eb1fce1d7ef 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -18,6 +18,17 @@ __version__ = '2.3.0' +import sys +import warnings + +if sys.version_info < (3, 8): + warnings.warn( + ('Python 3.7 has reached end-of-life. KFP will drop support for Python 3.7 on April 23, 2024. To use new versions of the KFP SDK after that date, you will need to upgrade to Python >= 3.8. See https://devguide.python.org/versions/ for more details.' + ), + FutureWarning, + stacklevel=2, + ) + TYPE_CHECK = True import os diff --git a/sdk/python/kfp/dsl/component_factory.py b/sdk/python/kfp/dsl/component_factory.py index 29402dc1315..5df3824e33c 100644 --- a/sdk/python/kfp/dsl/component_factory.py +++ b/sdk/python/kfp/dsl/component_factory.py @@ -532,6 +532,12 @@ def create_component_from_func( args = [] if base_image is None: base_image = _DEFAULT_BASE_IMAGE + warnings.warn( + ("Python 3.7 has reached end-of-life. The default base_image used by the @dsl.component decorator will switch from 'python:3.7' to 'python:3.8' on April 23, 2024. To ensure your existing components work with versions of the KFP SDK released after that date, you should provide an explicit base_image argument and ensure your component works as intended on Python 3.8." + ), + FutureWarning, + stacklevel=2, + ) component_image = base_image diff --git a/sdk/python/kfp/dsl/component_factory_test.py b/sdk/python/kfp/dsl/component_factory_test.py index 1b3f388e7f7..0def6344d6b 100644 --- a/sdk/python/kfp/dsl/component_factory_test.py +++ b/sdk/python/kfp/dsl/component_factory_test.py @@ -287,5 +287,19 @@ def comp(output_list: Output[List[Artifact]]): return dsl.ContainerSpec(image='alpine') +class TestPythonEOLWarning(unittest.TestCase): + + def test_default_base_image(self): + + with self.assertWarnsRegex( + FutureWarning, + r"Python 3\.7 has reached end-of-life\. The default base_image used by the @dsl\.component decorator will switch from 'python:3\.7' to 'python:3\.8' on April 23, 2024\. To ensure your existing components work with versions of the KFP SDK released after that date, you should provide an explicit base_image argument and ensure your component works as intended on Python 3\.8\." + ): + + @dsl.component + def foo(): + pass + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/init_test.py b/sdk/python/kfp/init_test.py new file mode 100644 index 00000000000..9e6a86598a1 --- /dev/null +++ b/sdk/python/kfp/init_test.py @@ -0,0 +1,36 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import sys +import unittest +from unittest import mock + + +@mock.patch.object(sys, 'version_info', new=(3, 7, 12, 'final', 0)) +class TestPythonEOLWarning(unittest.TestCase): + + def test(self): + mod = importlib.import_module('kfp') + + with self.assertWarnsRegex( + FutureWarning, + r'Python 3\.7 has reached end-of-life\. KFP will drop support for Python 3\.7 on April 23, 2024\. To use new versions of the KFP SDK after that date, you will need to upgrade to Python >= 3\.8\. See https:\/\/devguide\.python\.org\/versions\/ for more details\.' + ): + # simulate first import from kfp + importlib.reload(mod) + + +if __name__ == '__main__': + unittest.main() From 0d7561199751e83b4d7e1603c3d32d4088a7e208 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 24 Oct 2023 15:17:52 -0700 Subject: [PATCH 09/25] feat(components): [endpoint_batch_predict] Initialize component PiperOrigin-RevId: 576300455 --- .../model_evaluation/__init__.py | 2 + .../endpoint_batch_predict/__init__.py | 14 ++ .../endpoint_batch_predict/component.py | 229 ++++++++++++++++++ 3 files changed, 245 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index e41a4536031..07520b6f226 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -18,6 +18,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation.chunking.component import chunking as ChunkingOp from google_cloud_pipeline_components._implementation.model_evaluation.data_sampler.component import evaluation_data_sampler as EvaluationDataSamplerOp from google_cloud_pipeline_components._implementation.model_evaluation.dataset_preprocessor.component import dataset_preprocessor_error_analysis as EvaluationDatasetPreprocessorOp +from google_cloud_pipeline_components._implementation.model_evaluation.endpoint_batch_predict.component import evaluation_llm_endpoint_batch_predict_pipeline_graph_component from google_cloud_pipeline_components._implementation.model_evaluation.error_analysis_annotation.component import error_analysis_annotation as ErrorAnalysisAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.evaluated_annotation.component import evaluated_annotation as EvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp @@ -41,6 +42,7 @@ 'evaluation_llm_safety_bias_pipeline', 'evaluation_llm_embedding_pipeline', 'evaluation_llm_text2sql_pipeline', + 'evaluation_llm_endpoint_batch_predict_pipeline_graph_component', 'ChunkingOp', 'EvaluationDataSamplerOp', 'EvaluationDatasetPreprocessorOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/__init__.py new file mode 100644 index 00000000000..7edc3ee88e3 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation Endpoint Batch Predict Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/component.py new file mode 100644 index 00000000000..acb8048b9ce --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/component.py @@ -0,0 +1,229 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Endpoint batch predict component used in KFP pipelines.""" + +from typing import Dict, NamedTuple, Optional, Union +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from kfp import dsl +from kfp.dsl import container_component +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + +_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:wjess-test' + + +@dsl.component +def add_json_escape_parameters(parameters: dict) -> str: + import json + + json_escaped_parameters = json.dumps(parameters).replace('"', '\\"') + return json_escaped_parameters + + +@container_component +def endpoint_batch_predict( + gcp_resources: OutputPath(str), + gcs_output_directory: OutputPath(str), + project: str, + location: str, + source_gcs_uri: str, + model_parameters: Optional[str] = None, + gcs_destination_output_uri_prefix: Optional[str] = '', + endpoint_id: Optional[str] = None, + publisher_model: Optional[str] = None, + qms_override: Optional[str] = None, + display_name: str = 'endpoint_batch_predict', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +): + """Returns the batch prediction results for a given batch of instances. + + Args: + project: Required. The GCP project that runs the pipeline component. + location: Required. The GCP region that runs the pipeline component. + source_gcs_uri: Google Cloud Storage URI to your instances to run + prediction on. The stored file format should be jsonl and each line + contains one Prediction instance. Instance should match Deployed model's + instance schema + gcs_destination_output_uri_prefix: The Google Cloud Storage location of + the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + `prediction-model-`, where timestamp is in + YYYY-MM-DD-hh:mm:ss.sss format. Inside of it is file results.jsonl + endpoint_id: Required if no publisher_model is provided. The Endpoint ID + of the deployed the LLM to serve the prediction. When endpoint_id and + publisher_model are both provided, publisher_model will be used. + model_parameters: The parameters that govern the prediction. + publisher_model: Required if no endpoint_id is provided. Name of the + Publisher model. + location: Project the LLM Model is in. + qms_override: Manual control of a large language model's qms. Write up + when there's an approved quota increase for a LLM. Write down when + limiting qms of a LLM for this pipeline. Should be provided as a + dictionary, for example {'text-bison': 20}. For deployed model which + doesn't have google-vertex-llm-tuning-base-model-id label, override the + default here. + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted + to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Format is of the form projects/{project}/global/networks/{network}. + Where {project} is a project number, as in 12345, and {network} is a + network name. Private services access must already be configured for the + network. If left unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + gcs_output_directory (str): + GCS directory where endpoint batch prediction results are stored. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=_IMAGE_URI, + args=[ + f'--endpoint_batch_predict={True}', + f'--project={project}', + f'--location={location}', + f'--source_gcs_uri={source_gcs_uri}', + f'--model_parameters={model_parameters}', + f'--gcs_destination_output_uri_prefix={gcs_destination_output_uri_prefix}', + f'--endpoint_id={endpoint_id}', + f'--publisher_model={publisher_model}', + f'--qms_override={qms_override}', + f'--gcs_output_directory={gcs_output_directory}', + f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', + f'--gcp_resources={gcp_resources}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) + + +@dsl.pipeline(name='EvaludationLLMEndpointBatchPredictOp') +def evaluation_llm_endpoint_batch_predict_pipeline_graph_component( + project: str, + location: str, + source_gcs_uri: str, + model_parameters: Optional[Dict[str, Union[int, float]]] = {}, + gcs_destination_output_uri_prefix: Optional[str] = '', + endpoint_id: Optional[str] = None, + publisher_model: Optional[str] = None, + qms_override: Optional[str] = None, + display_name: str = 'endpoint_batch_predict', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +) -> NamedTuple('outputs', gcs_output_directory=str): + """The LLM Evaluation Text2SQL Pipeline. + + Args: + project: Required. The GCP project that runs the pipeline components. + location: Required. The GCP region that runs the pipeline components. + source_gcs_uri: Google Cloud Storage URI to your instances to run prediction + on. The stored file format should be jsonl and each line contains one + Prediction instance. Instance should match Deployed model's instance + schema + gcs_destination_output_uri_prefix: The Google Cloud Storage location of the + directory where the output is to be written to. In the given directory a + new directory is created. Its name is + `prediction-model-`, where timestamp is in + YYYY-MM-DD-hh:mm:ss.sss format. Inside of it is file results.jsonl + endpoint_id: Required if no publisher_model is provided. The Endpoint ID of + the deployed the LLM to serve the prediction. When endpoint_id and + publisher_model are both provided, publisher_model will be used. + model_parameters: The parameters that govern the prediction. + publisher_model: Required if no endpoint_id is provided. Name of the + Publisher model. + location: Project the LLM Model is in. + qms_override: Manual control of a large language model's qms. Write up when + there's an approved quota increase for a LLM. Write down when limiting qms + of a LLM for this pipeline. Should be provided as a dictionary, for + example {'text-bison': 20}. For deployed model which doesn't have + google-vertex-llm-tuning-base-model-id label, override the default here. + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted to + `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should + be peered. For example, projects/12345/global/networks/myVPC. Format is of + the form projects/{project}/global/networks/{network}. Where {project} is + a project number, as in 12345, and {network} is a network name. Private + services access must already be configured for the network. If left + unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + NamedTuple: + gcs_output_directory (str): + GCS directory where endpoint batch prediction results are stored. + """ + outputs = NamedTuple('outputs', gcs_output_directory=str) + + endpoint_batch_predict_task = endpoint_batch_predict( + project=project, + location=location, + source_gcs_uri=source_gcs_uri, + model_parameters=add_json_escape_parameters( + parameters=model_parameters + ).output, + gcs_destination_output_uri_prefix=gcs_destination_output_uri_prefix, + endpoint_id=endpoint_id, + publisher_model=publisher_model, + qms_override=qms_override, + display_name=display_name, + machine_type=machine_type, + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ) + + return outputs( + gcs_output_directory=endpoint_batch_predict_task.outputs[ + 'gcs_output_directory' + ] + ) From 570e56dd09af32e173cf041eed7497e4533ec186 Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 25 Oct 2023 10:42:41 -0700 Subject: [PATCH 10/25] fix(components): [text2sql] Turn model_inference_results_path to model_inference_results_directory and remove duplicate comment PiperOrigin-RevId: 576576299 --- .../text2sql/evaluation_llm_text2sql_pipeline.py | 12 +++++------- .../text2sql_evaluation/component.py | 10 ++++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py index e106efa6981..6f0af29e52c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -73,10 +73,6 @@ def evaluation_llm_text2sql_pipeline( must be one of the Model's supportedInputStorageFormats. If not set, default to "jsonl". For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_instances_format: The format in which perdictions are made, - must be one of the Model's supportedInputStorageFormats. If not set, - default to "jsonl". For more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. machine_type: The machine type of this custom job. If not set, defaulted to `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource @@ -148,9 +144,11 @@ def evaluation_llm_text2sql_pipeline( location=location, sql_dialect=sql_dialect, evaluation_method=evaluation_method, - # TODO(bozhengbz) Add value to model_inference_results_path - # when model batch prediction component is added. - model_inference_results_path='gs://test/model_inference_results.json', + # TODO(bozhengbz) Change value to model_inference_results_directory + # when sql query model batch prediction component is added. + model_inference_results_directory=batch_predict_table_names_task.outputs[ + 'gcs_output_directory' + ], tables_metadata_path=tables_metadata_path, machine_type=machine_type, service_account=service_account, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py index 063172067a2..a084de02d42 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py @@ -16,7 +16,9 @@ from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils from google_cloud_pipeline_components._implementation.model_evaluation import version +from kfp.dsl import Artifact from kfp.dsl import container_component +from kfp.dsl import Input from kfp.dsl import Metrics from kfp.dsl import Output from kfp.dsl import OutputPath @@ -33,7 +35,7 @@ def text2sql_evaluation( location: str, sql_dialect: str, evaluation_method: str, - model_inference_results_path: str, + model_inference_results_directory: Input[Artifact], tables_metadata_path: str, display_name: str = 'text2sql-evaluation', machine_type: str = 'e2-highmem-16', @@ -49,8 +51,8 @@ def text2sql_evaluation( sql_dialect: Required. SQL dialect type, e.g. bigquery, mysql, etc. evaluation_method: Required. Text2SQL evaluation method, value can be 'parser', 'execution', 'all'. - model_inference_results_path: Required. The path for json file containing - text2sql model inference results from the last step. + model_inference_results_directory: Required. The path for json file + containing text2sql model inference results from the last step. tables_metadata_path: Required. The path for json file containing database metadata, including table names, schema fields. display_name: The name of the Evaluation job. @@ -98,7 +100,7 @@ def text2sql_evaluation( f'--location={location}', f'--sql_dialect={sql_dialect}', f'--evaluation_method={evaluation_method}', - f'--model_inference_results_path={model_inference_results_path}', + f'--model_inference_results_directory={model_inference_results_directory.path}', f'--tables_metadata_path={tables_metadata_path}', f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', f'--gcp_resources={gcp_resources}', From c83329f69c57cf7ecd03703e192878522c4d23fa Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 25 Oct 2023 11:18:57 -0700 Subject: [PATCH 11/25] chore(components): fix GCPC markdown docstrings rendering PiperOrigin-RevId: 576588522 --- .../v1/automl/training_job/__init__.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py index 1f5612bcc4e..fa2f7099f19 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py @@ -11,11 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Create [Vertex AI AutoML training jobs](https://cloud.google.com/vertex- - -ai/docs/beginner/beginners-guide) for image, text, video, and forecasting. -""" - +# fmt: off +"""Create [Vertex AI AutoML training jobs](https://cloud.google.com/vertex-ai/docs/beginner/beginners-guide) for image, text, video, and forecasting.""" +# fmt: on from google_cloud_pipeline_components.v1.automl.training_job.automl_forecasting_training_job.component import automl_forecasting_training_job as AutoMLForecastingTrainingJobRunOp from google_cloud_pipeline_components.v1.automl.training_job.automl_image_training_job.component import automl_image_training_job as AutoMLImageTrainingJobRunOp from google_cloud_pipeline_components.v1.automl.training_job.automl_tabular_training_job.component import automl_tabular_training_job as AutoMLTabularTrainingJobRunOp From 7ab05d8a84fd295bb1b37285f831bda5bbf55cd2 Mon Sep 17 00:00:00 2001 From: rickyxie0929 <148598858+rickyxie0929@users.noreply.github.com> Date: Wed, 25 Oct 2023 17:29:09 -0700 Subject: [PATCH 12/25] chore(sdk): Remove the ` ()`from docstring args. (#10159) * chore(sdk): Remove the ` ()`from docstring args. Remote the ` ()` from the docstring args, which are redundant since there are type annotations. * Trim the unnecessary leading space. --- sdk/python/kfp/compiler/read_write_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sdk/python/kfp/compiler/read_write_test.py b/sdk/python/kfp/compiler/read_write_test.py index 7f33d733945..9be9d4ca2c8 100644 --- a/sdk/python/kfp/compiler/read_write_test.py +++ b/sdk/python/kfp/compiler/read_write_test.py @@ -175,12 +175,12 @@ def test( """Tests serialization and deserialization consistency and correctness. Args: - name (str): '{test_group_name}-{test_case_name}'. Useful for print statements/debugging. - test_case (str): Test case name (without file extension). - test_data_dir (str): The directory containing the test case files. - function (str, optional): The function name to compile. - read (bool): Whether the pipeline/component supports deserialization from YAML (IR, except for V1 component YAML back compatability tests). - write (bool): Whether the pipeline/component supports compilation from a Python file. + name: '{test_group_name}-{test_case_name}'. Useful for print statements/debugging. + test_case: Test case name (without file extension). + test_data_dir: The directory containing the test case files. + function: The function name to compile. + read: Whether the pipeline/component supports deserialization from YAML (IR, except for V1 component YAML back compatability tests). + write: Whether the pipeline/component supports compilation from a Python file. """ yaml_file = os.path.join(test_data_dir, f'{test_case}.yaml') py_file = os.path.join(test_data_dir, f'{test_case}.py') From 2882fcf025dd1dae0a5fdd3ba02965ad34d2f326 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 25 Oct 2023 20:26:09 -0500 Subject: [PATCH 13/25] chore(sdk): add pytest.ini file (#10160) * add pytest ini file * Update pytest.ini --- pytest.ini | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000000..a079fdd1c73 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +addopts = --ignore=sdk/python/kfp/deprecated --ignore=sdk/python/kfp/tests +testpaths = sdk/python/kfp From 02e00e8439e9753dbf82856ac9c5a7cec8ce3243 Mon Sep 17 00:00:00 2001 From: Mathew Wicks <5735406+thesuperzapper@users.noreply.github.com> Date: Wed, 25 Oct 2023 18:39:10 -0700 Subject: [PATCH 14/25] fix(sdk): type annotation for client credentials (#10158) --- sdk/python/kfp/client/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdk/python/kfp/client/client.py b/sdk/python/kfp/client/client.py index 448433ed9df..bdf9cbdf20f 100644 --- a/sdk/python/kfp/client/client.py +++ b/sdk/python/kfp/client/client.py @@ -32,6 +32,7 @@ from kfp import compiler from kfp.client import auth from kfp.client import set_volume_credentials +from kfp.client.token_credentials_base import TokenCredentialsBase from kfp.dsl import base_component from kfp.pipeline_spec import pipeline_spec_pb2 import kfp_server_api @@ -150,7 +151,7 @@ def __init__( proxy: Optional[str] = None, ssl_ca_cert: Optional[str] = None, kube_context: Optional[str] = None, - credentials: Optional[str] = None, + credentials: Optional[TokenCredentialsBase] = None, ui_host: Optional[str] = None, verify_ssl: Optional[bool] = None, ) -> None: @@ -221,7 +222,7 @@ def _load_config( proxy: Optional[str], ssl_ca_cert: Optional[str], kube_context: Optional[str], - credentials: Optional[str], + credentials: Optional[TokenCredentialsBase], verify_ssl: Optional[bool], ) -> kfp_server_api.Configuration: config = kfp_server_api.Configuration() From 03df9df68c9def59813075dacfa2328d92d008e5 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 25 Oct 2023 20:39:16 -0500 Subject: [PATCH 15/25] chore(sdk): fix local test failure (#10161) --- sdk/python/kfp/cli/component_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/kfp/cli/component_test.py b/sdk/python/kfp/cli/component_test.py index 4d256afaf2b..a818aafb80e 100644 --- a/sdk/python/kfp/cli/component_test.py +++ b/sdk/python/kfp/cli/component_test.py @@ -86,6 +86,7 @@ def setUp(self) -> None: }] self._docker_client.images.push.return_value = [{'status': 'Pushed'}] self.addCleanup(patcher.stop) + self.current_dir = os.path.dirname(os.path.abspath(__file__)) with contextlib.ExitStack() as stack: stack.enter_context(self.runner.isolated_filesystem()) @@ -579,8 +580,7 @@ def test_dockerfile_can_contain_custom_kfp_package(self): component = _make_component( func_name='train', target_image='custom-image') _write_components('components.py', component) - current_dir = os.path.dirname(os.path.abspath(__file__)) - package_dir = os.path.dirname(os.path.dirname(current_dir)) + package_dir = os.path.dirname(os.path.dirname(self.current_dir)) # suppresses large stdout from subprocess that builds kfp package with mock.patch.object( From 52f5cf51c4a6c233aae57125561c0fc95c4fd20f Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Thu, 26 Oct 2023 09:16:09 -0700 Subject: [PATCH 16/25] feat(backend): Support consuming parent DAG input artifact (#10162) --- backend/src/v2/driver/driver.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/backend/src/v2/driver/driver.go b/backend/src/v2/driver/driver.go index eda53baad58..d227855ca32 100644 --- a/backend/src/v2/driver/driver.go +++ b/backend/src/v2/driver/driver.go @@ -768,7 +768,11 @@ func resolveInputs(ctx context.Context, dag *metadata.DAG, iterationIndex *int, if err != nil { return nil, err } - glog.Infof("parent DAG input parameters %+v", inputParams) + inputArtifacts, err := mlmd.GetInputArtifactsByExecutionID(ctx, dag.Execution.GetID()) + if err != nil { + return nil, err + } + glog.Infof("parent DAG input parameters: %+v, artifacts: %+v", inputParams, inputArtifacts) inputs = &pipelinespec.ExecutorInput_Inputs{ ParameterValues: make(map[string]*structpb.Value), Artifacts: make(map[string]*pipelinespec.ArtifactList), @@ -998,7 +1002,15 @@ func resolveInputs(ctx context.Context, dag *metadata.DAG, iterationIndex *int, } switch t := artifactSpec.Kind.(type) { case *pipelinespec.TaskInputsSpec_InputArtifactSpec_ComponentInputArtifact: - return nil, artifactError(fmt.Errorf("component input artifact not implemented yet")) + inputArtifactName := artifactSpec.GetComponentInputArtifact() + if inputArtifactName == "" { + return nil, artifactError(fmt.Errorf("component input artifact key is empty")) + } + v, ok := inputArtifacts[inputArtifactName] + if !ok { + return nil, artifactError(fmt.Errorf("parent DAG does not have input artifact %s", inputArtifactName)) + } + inputs.Artifacts[name] = v case *pipelinespec.TaskInputsSpec_InputArtifactSpec_TaskOutputArtifact: taskOutput := artifactSpec.GetTaskOutputArtifact() From 1bee8be071a91f44c0129837c381863327cb337d Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 26 Oct 2023 11:44:41 -0700 Subject: [PATCH 17/25] feat(components): [text2sql] Generate column names by model batch predict PiperOrigin-RevId: 576941675 --- .../evaluation_llm_text2sql_pipeline.py | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py index 6f0af29e52c..f1f591d681b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -110,7 +110,7 @@ def evaluation_llm_text2sql_pipeline( ) batch_predict_table_names_task = ModelBatchPredictOp( - job_display_name='text2sql-batch-predict-table-names-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + job_display_name='text2sql-batch-prediction-table-names-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', model=get_vertex_model_task.outputs['artifact'], location=location, instances_format=batch_predict_instances_format, @@ -124,7 +124,7 @@ def evaluation_llm_text2sql_pipeline( project=project, ) - _ = Text2SQLEvaluationValidateAndProcessOp( + validate_table_names_and_process_task = Text2SQLEvaluationValidateAndProcessOp( project=project, location=location, model_inference_type='table_name_case', @@ -139,6 +139,38 @@ def evaluation_llm_text2sql_pipeline( encryption_spec_key_name=encryption_spec_key_name, ) + batch_predict_column_names_task = ModelBatchPredictOp( + job_display_name='text2sql-batch-prediction-column-names-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + model=get_vertex_model_task.outputs['artifact'], + location=location, + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_source_uris=validate_table_names_and_process_task.outputs[ + 'model_inference_input_path' + ], + model_parameters=model_parameters, + gcs_destination_output_uri_prefix=( + f'{PIPELINE_ROOT_PLACEHOLDER}/batch_predict_column_names_output' + ), + encryption_spec_key_name=encryption_spec_key_name, + project=project, + ) + + _ = Text2SQLEvaluationValidateAndProcessOp( + project=project, + location=location, + model_inference_type='column_name_case', + model_inference_results_directory=batch_predict_column_names_task.outputs[ + 'gcs_output_directory' + ], + tables_metadata_path=tables_metadata_path, + prompt_template_path=prompt_template_path, + machine_type=machine_type, + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ) + _ = Text2SQLEvaluationOp( project=project, location=location, From c0093ecef6bc5f056efa135d019267327115d79d Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Thu, 26 Oct 2023 21:02:10 -0700 Subject: [PATCH 18/25] feat(backend): Update driver and launcher images (#10164) --- backend/src/v2/compiler/argocompiler/argo.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/v2/compiler/argocompiler/argo.go b/backend/src/v2/compiler/argocompiler/argo.go index dc9dcd64577..d7c488972ae 100644 --- a/backend/src/v2/compiler/argocompiler/argo.go +++ b/backend/src/v2/compiler/argocompiler/argo.go @@ -116,8 +116,8 @@ func Compile(jobArg *pipelinespec.PipelineJob, kubernetesSpecArg *pipelinespec.S wf: wf, templates: make(map[string]*wfapi.Template), // TODO(chensun): release process and update the images. - driverImage: "gcr.io/ml-pipeline/kfp-driver@sha256:fa68f52639b4f4683c9f8f468502867c9663823af0fbcff1cbe7847d5374bf5c", - launcherImage: "gcr.io/ml-pipeline/kfp-launcher@sha256:6641bf94acaeec03ee7e231241800fce2f0ad92eee25371bd5248ca800a086d7", + driverImage: "gcr.io/ml-pipeline/kfp-driver@sha256:8e60086b04d92b657898a310ca9757631d58547e76bbbb8bfc376d654bef1707", + launcherImage: "gcr.io/ml-pipeline/kfp-launcher@sha256:50151a8615c8d6907aa627902dce50a2619fd231f25d1e5c2a72737a2ea4001e", job: job, spec: spec, executors: deploy.GetExecutors(), From 58ce09e07d031964905020c749e77bf0f37e83d4 Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Fri, 27 Oct 2023 04:08:53 +0000 Subject: [PATCH 19/25] chore(release): bumped version to 2.0.3 --- CHANGELOG.md | 42 +++++++++++++++++++ VERSION | 2 +- .../api/v1beta1/python_http_client/README.md | 4 +- .../kfp_server_api/__init__.py | 2 +- .../kfp_server_api/api_client.py | 2 +- .../kfp_server_api/configuration.py | 4 +- .../api/v1beta1/python_http_client/setup.py | 2 +- .../swagger/kfp_api_single_file.swagger.json | 2 +- .../api/v2beta1/python_http_client/README.md | 4 +- .../kfp_server_api/__init__.py | 2 +- .../kfp_server_api/api_client.py | 2 +- .../kfp_server_api/configuration.py | 4 +- .../api/v2beta1/python_http_client/setup.py | 2 +- .../swagger/kfp_api_single_file.swagger.json | 2 +- .../templates/application.yaml | 2 +- manifests/gcp_marketplace/schema.yaml | 4 +- .../base/cache-deployer/kustomization.yaml | 2 +- .../kustomize/base/cache/kustomization.yaml | 2 +- .../generic/pipeline-install-config.yaml | 2 +- .../base/metadata/base/kustomization.yaml | 2 +- .../base/pipeline/kustomization.yaml | 12 +++--- .../metadata-writer/kustomization.yaml | 2 +- .../env/gcp/inverse-proxy/kustomization.yaml | 2 +- 23 files changed, 74 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db4d260733f..c65dcbe724a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,47 @@ # Changelog +### [2.0.3](https://github.com/kubeflow/pipelines/compare/2.0.2...2.0.3) (2023-10-27) + + +### Features + +* **backend:** Support consuming parent DAG input artifact ([\#10162](https://github.com/kubeflow/pipelines/issues/10162)) ([52f5cf5](https://github.com/kubeflow/pipelines/commit/52f5cf51c4a6c233aae57125561c0fc95c4fd20f)) +* **backend:** Update driver and launcher images ([\#10164](https://github.com/kubeflow/pipelines/issues/10164)) ([c0093ec](https://github.com/kubeflow/pipelines/commit/c0093ecef6bc5f056efa135d019267327115d79d)) +* **components:** [endpoint_batch_predict] Initialize component ([0d75611](https://github.com/kubeflow/pipelines/commit/0d7561199751e83b4d7e1603c3d32d4088a7e208)) +* **components:** [text2sql] Generate column names by model batch predict ([1bee8be](https://github.com/kubeflow/pipelines/commit/1bee8be071a91f44c0129837c381863327cb337d)) +* **components:** [text2sql] Generate table names by model batch prediction ([ebb4245](https://github.com/kubeflow/pipelines/commit/ebb42450d0b07eaa8de35a3f6b70eacb5f26f0d8)) +* **components:** [text2sql] Implement preprocess component logic ([21079b5](https://github.com/kubeflow/pipelines/commit/21079b5910e597a38b67853f3ecfb3929344371e)) +* **components:** [text2sql] Initialize preprocess component and integrate with text2sql pipeline ([9aa750e](https://github.com/kubeflow/pipelines/commit/9aa750e62f6e225d037ecdda9bf7cab95f05675d)) +* **components:** [text2sql] Initialize evaluation component ([ea93979](https://github.com/kubeflow/pipelines/commit/ea93979eed02e131bd20180da149b9465670dfe1)) +* **components:** [text2sql] Initialize validate and process component ([633ddeb](https://github.com/kubeflow/pipelines/commit/633ddeb07e9212d2e373dba8d20a0f6d67ab037d)) +* **components:** Add ability to preprocess chat llama datasets to `_implementation.llm.chat_dataset_preprocessor` ([99fd201](https://github.com/kubeflow/pipelines/commit/99fd2017a76660f30d0a04b71542cbef45783633)) +* **components:** Add question_answer support for AutoSxS default instructions ([412216f](https://github.com/kubeflow/pipelines/commit/412216f832a848bfc61ce289aed819d7f2860fdd)) +* **components:** Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation feature attribution pipeline ([d8a0660](https://github.com/kubeflow/pipelines/commit/d8a0660df525f5695015e507e981bceff836dd3d)) +* **components:** Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline ([0487f9a](https://github.com/kubeflow/pipelines/commit/0487f9a8b1d8ab0d96d757bd4b598ffd353ecc81)) +* **components:** add support for customizing model_parameters in LLM eval text generation and LLM eval text classification pipelines ([d53ddda](https://github.com/kubeflow/pipelines/commit/d53dddab1c8a042e58e06ff6eb38be82fefddb0a)) +* **components:** Make `model_checkpoint` optional for `preview.llm.infer_pipeline` ([e8fb699](https://github.com/kubeflow/pipelines/commit/e8fb6990dfdf036c941c522f9b384ff679b38ca6)) +* **components:** migrate `DataflowFlexTemplateJobOp` to GA namespace (now `v1.dataflow.DataflowFlexTemplateJobOp`) ([faba922](https://github.com/kubeflow/pipelines/commit/faba9223ee846d459f7bb497a6faa3c153dcf430)) +* **components:** Set display names for SFT, RLHF and LLM inference pipelines ([1386a82](https://github.com/kubeflow/pipelines/commit/1386a826ba2bcdbc19eb2007ca43f6acd1031e4d)) +* **components:** Support service account in kubeflow model_batch_predict component ([1682ce8](https://github.com/kubeflow/pipelines/commit/1682ce8adeb2c55a155588eae7492b2f0a8b783a)) +* **components:** Update image tag used by llm pipelines ([4d71fda](https://github.com/kubeflow/pipelines/commit/4d71fdac3fc92dd4d54c6be3a28725667b8f3c5e)) +* **sdk:** support a Pythonic artifact authoring style ([\#9932](https://github.com/kubeflow/pipelines/issues/9932)) ([8d00d0e](https://github.com/kubeflow/pipelines/commit/8d00d0eb9a1442ed994b6a90acea88604efc6423)) +* **sdk:** support collecting outputs from conditional branches using `dsl.OneOf` ([\#10067](https://github.com/kubeflow/pipelines/issues/10067)) ([2d3171c](https://github.com/kubeflow/pipelines/commit/2d3171cbfec626055e59b8a58ce83fb54ecad113)) + + +### Bug Fixes + +* **components:** [text2sql] Turn model_inference_results_path to model_inference_results_directory and remove duplicate comment ([570e56d](https://github.com/kubeflow/pipelines/commit/570e56dd09af32e173cf041eed7497e4533ec186)) +* **frontend:** Replace twitter artifactory endpoint with npm endpoint. ([\#10099](https://github.com/kubeflow/pipelines/issues/10099)) ([da6a360](https://github.com/kubeflow/pipelines/commit/da6a3601468282c0592eae8e89a3d97b982e2d43)) +* **sdk:** fix bug when `dsl.importer` argument is provided by loop variable ([\#10116](https://github.com/kubeflow/pipelines/issues/10116)) ([73d51c8](https://github.com/kubeflow/pipelines/commit/73d51c8a23afad97efb6d7e7436c081fa22ce24d)) +* **sdk:** Fix OOB for IPython and refactor. Closes [\#10075](https://github.com/kubeflow/pipelines/issues/10075). ([\#10094](https://github.com/kubeflow/pipelines/issues/10094)) ([c903271](https://github.com/kubeflow/pipelines/commit/c9032716ab2013df56cb1078a703d48ed8e36fb4)) +* **sdk:** type annotation for client credentials ([\#10158](https://github.com/kubeflow/pipelines/issues/10158)) ([02e00e8](https://github.com/kubeflow/pipelines/commit/02e00e8439e9753dbf82856ac9c5a7cec8ce3243)) + + +### Other Pull Requests + +* feat(components) Extend kserve component ([\#10136](https://github.com/kubeflow/pipelines/issues/10136)) ([2054b7c](https://github.com/kubeflow/pipelines/commit/2054b7c45d4831c787115563c8be0048abcb9be1)) +* No public description ([0e240db](https://github.com/kubeflow/pipelines/commit/0e240db39799cb0afbd8c7f982ffdd4f9eb58121)) + ### [2.0.2](https://github.com/kubeflow/pipelines/compare/2.0.0...2.0.2) (2023-10-11) diff --git a/VERSION b/VERSION index f93ea0ca333..6acdb442890 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.2 \ No newline at end of file +2.0.3 \ No newline at end of file diff --git a/backend/api/v1beta1/python_http_client/README.md b/backend/api/v1beta1/python_http_client/README.md index 12742f284f6..1de30811eea 100644 --- a/backend/api/v1beta1/python_http_client/README.md +++ b/backend/api/v1beta1/python_http_client/README.md @@ -3,8 +3,8 @@ This file contains REST API specification for Kubeflow Pipelines. The file is au This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project: -- API version: 2.0.2 -- Package version: 2.0.2 +- API version: 2.0.3 +- Package version: 2.0.3 - Build package: org.openapitools.codegen.languages.PythonClientCodegen For more information, please visit [https://www.google.com](https://www.google.com) diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py b/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py index 8d3f7b1a350..86713cb5811 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py @@ -14,7 +14,7 @@ from __future__ import absolute_import -__version__ = "2.0.2" +__version__ = "2.0.3" # import apis into sdk package from kfp_server_api.api.experiment_service_api import ExperimentServiceApi diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py b/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py index e5afaf6b984..8a2be9ffd90 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py @@ -78,7 +78,7 @@ def __init__(self, configuration=None, header_name=None, header_value=None, self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/2.0.2/python' + self.user_agent = 'OpenAPI-Generator/2.0.3/python' self.client_side_validation = configuration.client_side_validation def __enter__(self): diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py b/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py index 578dcda2dcf..fb157f50255 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py @@ -351,8 +351,8 @@ def to_debug_report(self): return "Python SDK Debug Report:\n"\ "OS: {env}\n"\ "Python Version: {pyversion}\n"\ - "Version of the API: 2.0.2\n"\ - "SDK Package Version: 2.0.2".\ + "Version of the API: 2.0.3\n"\ + "SDK Package Version: 2.0.3".\ format(env=sys.platform, pyversion=sys.version) def get_host_settings(self): diff --git a/backend/api/v1beta1/python_http_client/setup.py b/backend/api/v1beta1/python_http_client/setup.py index aa45f1e52d8..9c9464f5aeb 100644 --- a/backend/api/v1beta1/python_http_client/setup.py +++ b/backend/api/v1beta1/python_http_client/setup.py @@ -13,7 +13,7 @@ from setuptools import setup, find_packages # noqa: H301 NAME = "kfp-server-api" -VERSION = "2.0.2" +VERSION = "2.0.3" # To install the library, run the following # # python setup.py install diff --git a/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json b/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json index 233d7a0e887..9414f4cdcd6 100644 --- a/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json +++ b/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json @@ -2,7 +2,7 @@ "swagger": "2.0", "info": { "title": "Kubeflow Pipelines API", - "version": "2.0.2", + "version": "2.0.3", "description": "This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition.", "contact": { "name": "google", diff --git a/backend/api/v2beta1/python_http_client/README.md b/backend/api/v2beta1/python_http_client/README.md index 7b2ec51e9e6..be20533cf9f 100644 --- a/backend/api/v2beta1/python_http_client/README.md +++ b/backend/api/v2beta1/python_http_client/README.md @@ -3,8 +3,8 @@ This file contains REST API specification for Kubeflow Pipelines. The file is au This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project: -- API version: 2.0.2 -- Package version: 2.0.2 +- API version: 2.0.3 +- Package version: 2.0.3 - Build package: org.openapitools.codegen.languages.PythonClientCodegen For more information, please visit [https://www.google.com](https://www.google.com) diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py index 3f33d9f4fac..f7a521107db 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py @@ -14,7 +14,7 @@ from __future__ import absolute_import -__version__ = "2.0.2" +__version__ = "2.0.3" # import apis into sdk package from kfp_server_api.api.auth_service_api import AuthServiceApi diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py b/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py index e5afaf6b984..8a2be9ffd90 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py @@ -78,7 +78,7 @@ def __init__(self, configuration=None, header_name=None, header_value=None, self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/2.0.2/python' + self.user_agent = 'OpenAPI-Generator/2.0.3/python' self.client_side_validation = configuration.client_side_validation def __enter__(self): diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py b/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py index 578dcda2dcf..fb157f50255 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py @@ -351,8 +351,8 @@ def to_debug_report(self): return "Python SDK Debug Report:\n"\ "OS: {env}\n"\ "Python Version: {pyversion}\n"\ - "Version of the API: 2.0.2\n"\ - "SDK Package Version: 2.0.2".\ + "Version of the API: 2.0.3\n"\ + "SDK Package Version: 2.0.3".\ format(env=sys.platform, pyversion=sys.version) def get_host_settings(self): diff --git a/backend/api/v2beta1/python_http_client/setup.py b/backend/api/v2beta1/python_http_client/setup.py index aa45f1e52d8..9c9464f5aeb 100644 --- a/backend/api/v2beta1/python_http_client/setup.py +++ b/backend/api/v2beta1/python_http_client/setup.py @@ -13,7 +13,7 @@ from setuptools import setup, find_packages # noqa: H301 NAME = "kfp-server-api" -VERSION = "2.0.2" +VERSION = "2.0.3" # To install the library, run the following # # python setup.py install diff --git a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json index 60d0004a14a..2a63b01dd63 100644 --- a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json +++ b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json @@ -2,7 +2,7 @@ "swagger": "2.0", "info": { "title": "Kubeflow Pipelines API", - "version": "2.0.2", + "version": "2.0.3", "description": "This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition.", "contact": { "name": "google", diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml index 77728a31db3..a563a4844fc 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml @@ -12,7 +12,7 @@ metadata: spec: descriptor: type: Kubeflow Pipelines - version: 2.0.2 + version: 2.0.3 description: |- Reusable end-to-end ML workflow maintainers: diff --git a/manifests/gcp_marketplace/schema.yaml b/manifests/gcp_marketplace/schema.yaml index bc2c3fda7ff..fa50b0207d7 100644 --- a/manifests/gcp_marketplace/schema.yaml +++ b/manifests/gcp_marketplace/schema.yaml @@ -1,9 +1,9 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: 2.0.2 + publishedVersion: 2.0.3 publishedVersionMetadata: - releaseNote: Based on 2.0.2 version. + releaseNote: Based on 2.0.3 version. releaseTypes: - Feature recommended: false diff --git a/manifests/kustomize/base/cache-deployer/kustomization.yaml b/manifests/kustomize/base/cache-deployer/kustomization.yaml index 1e82e5ef346..de44a30c52f 100644 --- a/manifests/kustomize/base/cache-deployer/kustomization.yaml +++ b/manifests/kustomize/base/cache-deployer/kustomization.yaml @@ -8,4 +8,4 @@ commonLabels: app: cache-deployer images: - name: gcr.io/ml-pipeline/cache-deployer - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/base/cache/kustomization.yaml b/manifests/kustomize/base/cache/kustomization.yaml index 2f2ca2f4b45..24fa04023ab 100644 --- a/manifests/kustomize/base/cache/kustomization.yaml +++ b/manifests/kustomize/base/cache/kustomization.yaml @@ -10,4 +10,4 @@ commonLabels: app: cache-server images: - name: gcr.io/ml-pipeline/cache-server - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml index b8cfddd1ad2..cd3b48e8ee7 100644 --- a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml +++ b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml @@ -11,7 +11,7 @@ data: until the changes take effect. A quick way to restart all deployments in a namespace: `kubectl rollout restart deployment -n `. appName: pipeline - appVersion: 2.0.2 + appVersion: 2.0.3 dbHost: mysql # relic to be removed after release dbPort: "3306" # relic to be removed after release dbType: mysql diff --git a/manifests/kustomize/base/metadata/base/kustomization.yaml b/manifests/kustomize/base/metadata/base/kustomization.yaml index b25f43b46a5..1988d30e0d6 100644 --- a/manifests/kustomize/base/metadata/base/kustomization.yaml +++ b/manifests/kustomize/base/metadata/base/kustomization.yaml @@ -9,4 +9,4 @@ resources: - metadata-grpc-sa.yaml images: - name: gcr.io/ml-pipeline/metadata-envoy - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/base/pipeline/kustomization.yaml b/manifests/kustomize/base/pipeline/kustomization.yaml index 492b72b00dd..90b2d713c94 100644 --- a/manifests/kustomize/base/pipeline/kustomization.yaml +++ b/manifests/kustomize/base/pipeline/kustomization.yaml @@ -37,14 +37,14 @@ resources: - kfp-launcher-configmap.yaml images: - name: gcr.io/ml-pipeline/api-server - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/persistenceagent - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/scheduledworkflow - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/frontend - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/viewer-crd-controller - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/visualization-server - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml b/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml index f27ba776894..b503511088b 100644 --- a/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml +++ b/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml @@ -7,4 +7,4 @@ resources: - metadata-writer-sa.yaml images: - name: gcr.io/ml-pipeline/metadata-writer - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml b/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml index 064b1951827..827f961978b 100644 --- a/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml +++ b/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: gcr.io/ml-pipeline/inverse-proxy-agent - newTag: 2.0.2 + newTag: 2.0.3 resources: - proxy-configmap.yaml - proxy-deployment.yaml From f250700e4d6828257fe7ea5701c36001d53b7de2 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 27 Oct 2023 12:04:10 -0500 Subject: [PATCH 20/25] chore(sdk): release KFP SDK 2.4.0 (#10167) --- docs/conf.py | 9 ++++++++- sdk/RELEASE.md | 15 ++++++++++++++- sdk/python/kfp/__init__.py | 2 +- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 253fae4000f..38925518344 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -132,12 +132,19 @@ True, 'version_info': [ # need to use the sdk- prefix to avoid conflict with the BE's GitHub release tags + { + 'version': + 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.4.0/', + 'title': + '2.4.0', + 'aliases': ['stable'], + }, { 'version': 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.3.0/', 'title': '2.3.0', - 'aliases': ['stable'], + 'aliases': [], }, { 'version': diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 61dfc86d14f..4ee8405ce4c 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -3,21 +3,34 @@ ## Features ## Breaking changes + +## Deprecations + +## Bug fixes and other changes + +## Documentation updates + +# 2.4.0 + +## Features +* Add support for a Pythonic artifact authoring style [\#9932](https://github.com/kubeflow/pipelines/pull/9932) * Support collecting outputs from conditional branches using `dsl.OneOf` [\#10067](https://github.com/kubeflow/pipelines/pull/10067) +## Breaking changes + ## Deprecations * Add notice of Python 3.7 support removal on April 23, 2024 [\#10139](https://github.com/kubeflow/pipelines/pull/10139) ## Bug fixes and other changes * Fix type on `dsl.ParallelFor` sub-DAG output when a `dsl.Collected` is used. Non-functional fix. [\#10069](https://github.com/kubeflow/pipelines/pull/10069) * Fix bug when `dsl.importer` argument is provided by a `dsl.ParallelFor` loop variable. [\#10116](https://github.com/kubeflow/pipelines/pull/10116) +* Fix client authentication in notebook and iPython environments [\#10094](https://github.com/kubeflow/pipelines/pull/10094) ## Documentation updates # 2.3.0 ## Features * Support `PipelineTaskFinalStatus` in tasks that use `.ignore_upstream_failure()` [\#10010](https://github.com/kubeflow/pipelines/pull/10010) -* Add support for a Pythonic artifact authoring style [\#9932](https://github.com/kubeflow/pipelines/pull/9932) ## Breaking changes diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index eb1fce1d7ef..72f210e1eae 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -16,7 +16,7 @@ # https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages __path__ = __import__('pkgutil').extend_path(__path__, __name__) -__version__ = '2.3.0' +__version__ = '2.4.0' import sys import warnings From 9a1447aa04cd86d9182a5c06e5991bf570bc591d Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 27 Oct 2023 15:07:11 -0500 Subject: [PATCH 21/25] docs(sdk): fix readthedocs build error (#10170) --- .readthedocs.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.readthedocs.yml b/.readthedocs.yml index 729a1c1ae21..0944a5a1871 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -6,3 +6,7 @@ python: version: 3.7 install: - requirements: docs/requirements.txt +build: + os: ubuntu-22.04 + tools: + python: "3.7" \ No newline at end of file From 1ba6d5f1c402158966d7fdc552b99c0ffca2dfa8 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 27 Oct 2023 16:10:10 -0500 Subject: [PATCH 22/25] docs(sdk): fix readthedocs build error, additional changes (#10173) --- .readthedocs.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 0944a5a1871..9c5ec8fafda 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -3,7 +3,6 @@ version: 2 sphinx: configuration: docs/conf.py python: - version: 3.7 install: - requirements: docs/requirements.txt build: From 0268ca9a843035c114b73e5e9725c7c7b3cf0b98 Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn Date: Sun, 29 Oct 2023 14:49:17 +0100 Subject: [PATCH 23/25] adjust component.yaml and source code to add transformer to ifsvc --- components/kserve/component.yaml | 94 ++++---- components/kserve/src/kservedeployer.py | 285 +++++++++++++++++------- 2 files changed, 257 insertions(+), 122 deletions(-) diff --git a/components/kserve/component.yaml b/components/kserve/component.yaml index 9d7b97e3e23..04dd801d33c 100644 --- a/components/kserve/component.yaml +++ b/components/kserve/component.yaml @@ -1,25 +1,33 @@ name: Serve a model with KServe description: Serve Models using KServe inputs: - - {name: Action, type: String, default: 'create', description: 'Action to execute on KServe'} - - {name: Model Name, type: String, default: '', description: 'Name to give to the deployed model'} - - {name: Model URI, type: String, default: '', description: 'Path of the S3 or GCS compatible directory containing the model.'} - - {name: Canary Traffic Percent, type: String, default: '100', description: 'The traffic split percentage between the candidate model and the last ready model'} - - {name: Namespace, type: String, default: '', description: 'Kubernetes namespace where the KServe service is deployed.'} - - {name: Framework, type: String, default: '', description: 'Machine Learning Framework for Model Serving.'} - - {name: Runtime Version, type: String, default: 'latest', description: 'Runtime Version of Machine Learning Framework'} - - {name: Resource Requests, type: String, default: '{"cpu": "0.5", "memory": "512Mi"}', description: 'CPU and Memory requests for Model Serving'} - - {name: Resource Limits, type: String, default: '{"cpu": "1", "memory": "1Gi"}', description: 'CPU and Memory limits for Model Serving'} - - {name: Custom Model Spec, type: String, default: '{}', description: 'Custom model runtime container spec in JSON'} - - {name: Autoscaling Target, type: String, default: '0', description: 'Autoscaling Target Number'} - - {name: Service Account, type: String, default: '', description: 'ServiceAccount to use to run the InferenceService pod'} - - {name: Enable Istio Sidecar, type: Bool, default: 'True', description: 'Whether to enable istio sidecar injection'} - - {name: InferenceService YAML, type: String, default: '{}', description: 'Raw InferenceService serialized YAML for deployment'} - - {name: Watch Timeout, type: String, default: '300', description: "Timeout seconds for watching until InferenceService becomes ready."} - - {name: Min Replicas, type: String, default: '-1', description: 'Minimum number of InferenceService replicas'} - - {name: Max Replicas, type: String, default: '-1', description: 'Maximum number of InferenceService replicas'} - - {name: Request Timeout, type: String, default: '60', description: "Specifies the number of seconds to wait before timing out a request to the component."} - - {name: Enable ISVC Status, type: Bool, default: 'True', description: "Specifies whether to store the inference service status as the output parameter"} + - {name: Action, type: String, default: 'create', description: 'Action to execute on KServe'} + - {name: Model Name, type: String, default: '', description: 'Name to give to the deployed InferenceService'} + - {name: Namespace, type: String, default: '', description: 'Kubernetes namespace where the InferenceService is deployed'} + - {name: Autoscaling Target, type: String, default: '0', description: 'Autoscaling Target Number'} + - {name: Enable Istio Sidecar, type: Bool, default: 'True', description: 'Whether to enable istio sidecar injection'} + - {name: Enable ISVC Status, type: Bool, default: 'True', description: "Specifies whether to store the inference service status as the output parameter"} + - {name: InferenceService YAML, type: String, default: '{}', description: 'Raw InferenceService serialized YAML for deployment'} + - {name: Watch Timeout, type: String, default: '300', description: "Timeout seconds for watching until InferenceService becomes ready."} + - {name: Service Account, type: String, default: '', description: 'K8s ServiceAccount to use to run the Predictor and Transformer pod'} + - {name: Pred Min Replicas, type: String, default: '-1', description: 'Minimum number of Predictor replicas'} + - {name: Pred Max Replicas, type: String, default: '-1', description: 'Maximum number of Predictor replicas'} + - {name: Pred Model URI, type: String, default: '', description: 'Path of the S3, GCS or ABS compatible directory containing the Predictor model'} + - {name: Pred Canary Traffic Percent, type: String, default: '100', description: 'The traffic split percentage between the candidate model and the last ready model'} + - {name: Pred Framework, type: String, default: '', description: 'Machine Learning Framework for the Predictor'} + - {name: Pred Runtime Version, type: String, default: 'latest', description: 'Runtime Version of Machine Learning Framework'} + - {name: Pred Resource Requests, type: String, default: '{"cpu": "0.5", "memory": "512Mi"}', description: 'CPU and Memory requests for the Predictor'} + - {name: Pred Resource Limits, type: String, default: '{"cpu": "1", "memory": "1Gi"}', description: 'CPU and Memory limits for the Predictor'} + - {name: Pred Request Timeout, type: String, default: '60', description: 'Specifies the number of seconds to wait before timing out a request to the Predictor.'} + - {name: Pred Custom Model Spec, type: String, default: '{}', description: 'Custom pod container spec in JSON to be used in the Predictor pod'} + - {name: Transf Min Replicas, type: String, default: '-1', description: 'Minimum number of Transformer replicas'} + - {name: Transf Max Replicas, type: String, default: '-1', description: 'Maximum number of Transformer replicas'} + - {name: Transf Image, type: String, default: '', description: 'Docker image used for the Transformer pod container'} + - {name: Transf Args, type: String, default: '[]', description: 'Arguments to the entrypoint of the Transformer pod container, overwrites CMD'} + - {name: Transf URI, type: String, default: '', description: 'Path of the S3, GCS or ABS compatible directory containing the Transformer. Not necessary if the whole pre-/postprocessing logic is in the docker image'} + - {name: Transf Resource Requests, type: String, default: '{"cpu": "0.5", "memory": "512Mi"}', description: 'CPU and Memory requests for the Transformer'} + - {name: Transf Resource Limits, type: String, default: '{"cpu": "1", "memory": "1Gi"}', description: 'CPU and Memory limits for the Transformer'} + - {name: Transf Request Timeout, type: String, default: '60', description: 'Specifies the number of seconds to wait before timing out a request to the Transformer'} outputs: - {name: InferenceService Status, type: String, description: 'Status JSON output of InferenceService'} @@ -29,24 +37,32 @@ implementation: command: ['python'] args: [ -u, kservedeployer.py, - --action, {inputValue: Action}, - --model-name, {inputValue: Model Name}, - --model-uri, {inputValue: Model URI}, - --canary-traffic-percent, {inputValue: Canary Traffic Percent}, - --namespace, {inputValue: Namespace}, - --framework, {inputValue: Framework}, - --runtime-version, {inputValue: Runtime Version}, - --resource-requests, {inputValue: Resource Requests}, - --resource-limits, {inputValue: Resource Limits}, - --custom-model-spec, {inputValue: Custom Model Spec}, - --autoscaling-target, {inputValue: Autoscaling Target}, - --service-account, {inputValue: Service Account}, - --enable-istio-sidecar, {inputValue: Enable Istio Sidecar}, - --output-path, {outputPath: InferenceService Status}, - --inferenceservice-yaml, {inputValue: InferenceService YAML}, - --watch-timeout, {inputValue: Watch Timeout}, - --min-replicas, {inputValue: Min Replicas}, - --max-replicas, {inputValue: Max Replicas}, - --request-timeout, {inputValue: Request Timeout}, - --enable-isvc-status, {inputValue: Enable ISVC Status} + --action, {inputValue: Action}, + --model-name, {inputValue: Model Name}, + --namespace, {inputValue: Namespace}, + --autoscaling-target, {inputValue: Autoscaling Target}, + --enable-istio-sidecar, {inputValue: Enable Istio Sidecar}, + --enable-isvc-status, {inputValue: Enable ISVC Status} + --inferenceservice-yaml, {inputValue: InferenceService YAML}, + --watch-timeout, {inputValue: Watch Timeout}, + --service-account, {inputValue: Service Account}, + --pred-min-replicas, {inputValue: Pred Min Replicas}, + --pred-max-replicas, {inputValue: Pred Max Replicas}, + --pred-model-uri, {inputValue: Pred Model URI}, + --pred-canary-traffic-percent, {inputValue: Pred Canary Traffic Percent}, + --pred-framework, {inputValue: Pred Framework}, + --pred-runtime-version, {inputValue: Pred Runtime Version}, + --pred-resource-requests, {inputValue: Pred Resource Requests}, + --pred-resource-limits, {inputValue: Pred Resource Limits}, + --pred-request-timeout, {inputValue: Pred Request Timeout}, + --pred-custom-model-spec, {inputValue: Pred Custom Model Spec}, + --transf-min-replicas {inputValue: Transf Min Replicas}, + --transf-max-replicas {inputValue: Transf Max Replicas}, + --transf-uri, {inputValue: Transformer URI}, + --transf-image, {inputValue: Transf Image}, + --transf-args, {inputValue: Transf Args}, + --transf-resource-requests, {inputValue: Transf Resource Requests}, + --transf-resource-limits, {inputValue: Transf Resource Limits}, + --transf-request-timeout, {inputValue: TransfRequest Timeout}, + --output-path, {outputPath: InferenceService Status}, ] diff --git a/components/kserve/src/kservedeployer.py b/components/kserve/src/kservedeployer.py index c8799332f76..9026b39ca25 100644 --- a/components/kserve/src/kservedeployer.py +++ b/components/kserve/src/kservedeployer.py @@ -13,6 +13,7 @@ # limitations under the License. import argparse +import ast from distutils.util import strtobool import json import os @@ -21,6 +22,8 @@ import yaml from kubernetes import client +from kubernetes.client import V1Container +from kubernetes.client import V1EnvVar from kubernetes.client.models import V1ResourceRequirements from kserve import constants @@ -36,6 +39,7 @@ from kserve import V1beta1TorchServeSpec from kserve import V1beta1TritonSpec from kserve import V1beta1XGBoostSpec +from kserve import V1beta1TransformerSpec from kserve.api.watch import isvc_watch @@ -95,6 +99,34 @@ def create_predictor_spec(framework, runtime_version, resource_requests, resourc return predictor_spec +def create_transformer_spec(resource_requests, resource_limits, docker_image, + image_args, storage_uri, service_account, min_replicas, + max_replicas, request_timeout): + """ + Create and return V1beta1TransformerSpec to be used in a V1beta1InferenceServiceSpec + object. + """ + if docker_image: + return V1beta1TransformerSpec( + min_replicas=(min_replicas if min_replicas >= 0 else None), + max_replicas=(max_replicas if max_replicas > 0 and max_replicas >= min_replicas else None), + service_account_name=service_account, + timeout=request_timeout, + containers=[V1Container( + name="kserve-transformer", + image=docker_image, + args=(image_args if image_args else None), + env=[(V1EnvVar(name="STORAGE_URI", value=storage_uri) if storage_uri else None)], + resources=V1ResourceRequirements( + requests=resource_requests, + limits=resource_limits + ) + )] + ) + else: + return None + + def create_custom_container_spec(custom_model_spec): """ Given a JSON container spec, return a V1Container object @@ -146,7 +178,7 @@ def create_custom_container_spec(custom_model_spec): ) -def create_inference_service(metadata, predictor_spec): +def create_inference_service(metadata, transformer_spec, predictor_spec): """ Build and return V1beta1InferenceService object. """ @@ -155,6 +187,7 @@ def create_inference_service(metadata, predictor_spec): kind=constants.KSERVE_KIND, metadata=metadata, spec=V1beta1InferenceServiceSpec( + transformer=transformer_spec, predictor=predictor_spec ), ) @@ -187,10 +220,14 @@ def submit_api_request(kserve_client, action, name, isvc, namespace=None, return outputs -def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, framework, - runtime_version, resource_requests, resource_limits, custom_model_spec, - service_account, inferenceservice_yaml, request_timeout, autoscaling_target=0, - enable_istio_sidecar=True, watch_timeout=300, min_replicas=0, max_replicas=0): +def perform_action(action, model_name, namespace, pred_model_uri, pred_canary_traffic_percent, + pred_framework, pred_runtime_version, pred_resource_requests, + pred_resource_limits, pred_custom_model_spec, service_account, + inferenceservice_yaml, pred_request_timeout, transf_resource_requests, + transf_uri, transf_request_timeout, transf_resource_limits, transf_image, + transf_args, autoscaling_target=0, enable_istio_sidecar=True, + watch_timeout=300, pred_min_replicas=0, pred_max_replicas=0, + transf_min_replicas=0, transf_max_replicas=0): """ Perform the specified action. If the action is not 'delete' and `inferenceService_yaml` was provided, the dict representation of the YAML will be sent directly to the @@ -227,18 +264,24 @@ def perform_action(action, model_name, model_uri, canary_traffic_percent, namesp # If a custom model container spec was provided, build the V1Container # object using it. - containers = [] - if custom_model_spec: - containers = [create_custom_container_spec(custom_model_spec)] + pred_containers = [] + if pred_custom_model_spec: + pred_containers = [create_custom_container_spec(pred_custom_model_spec)] - # Build the V1beta1PredictorSpec. + # Build the V1beta1PredictorSpec and V1beta1TransformerSpec predictor_spec = create_predictor_spec( - framework, runtime_version, resource_requests, resource_limits, - model_uri, canary_traffic_percent, service_account, min_replicas, - max_replicas, containers, request_timeout + pred_framework, pred_runtime_version, pred_resource_requests, pred_resource_limits, + pred_model_uri, pred_canary_traffic_percent, service_account, pred_min_replicas, + pred_max_replicas, pred_containers, pred_request_timeout ) - isvc = create_inference_service(metadata, predictor_spec) + transformer_spec = create_transformer_spec( + transf_resource_requests, transf_resource_limits, transf_image, transf_args, + transf_uri, service_account, transf_min_replicas, transf_max_replicas, + transf_request_timeout + ) + + isvc = create_inference_service(metadata, transformer_spec, predictor_spec) if action == "create": submit_api_request(kserve_client, 'create', model_name, isvc, namespace, @@ -271,119 +314,187 @@ def main(): "--action", type=str, help="Action to execute on KServe", default="create" ) parser.add_argument( - "--model-name", type=str, help="Name to give to the deployed model" + "--model-name", type=str, help="Name to give to the deployed InferenceService" ) parser.add_argument( - "--model-uri", + "--namespace", type=str, - help="Path of the S3, GCS or PVC directory containing the model", + help="Kubernetes namespace where the InferenceService is deployed", + default="", + ) + parser.add_argument( + "--autoscaling-target", type=str, help="Autoscaling target number", default="0" + ) + parser.add_argument( + "--enable-istio-sidecar", + type=strtobool, + help="Whether to inject istio sidecar", + default="True" + ) + parser.add_argument( + "--enable-isvc-status", + type=strtobool, + help="Specifies whether to store the inference service status as the output parameter", + default="True" ) parser.add_argument( - "--canary-traffic-percent", + "--inferenceservice-yaml", + type=yaml.safe_load, + help="Raw InferenceService serialized YAML for deployment", + default="{}" + ) + parser.add_argument( + "--watch-timeout", type=str, - help="The traffic split percentage between the candidate model and the last ready model", - default="100", + help="Timeout seconds for watching until InferenceService becomes ready", + default="300" ) parser.add_argument( - "--namespace", + "--service-account", type=str, - help="Kubernetes namespace where the KServe service is deployed", + help="Service account containing AWS S3, GCP or ABS credentials", default="", ) + + parser.add_argument( + "--pred-min-replicas", + type=str, + help="Minimum number of Predictor replicas", + default="-1" + ) + parser.add_argument( + "--pred-max-replicas", + type=str, + help="Maximum number of Predictor replicas", + default="-1" + ) + parser.add_argument( + "--pred-model-uri", + type=str, + help="Path of the S3, GCS or ABS compatible directory containing the Predictor model", + ) + parser.add_argument( + "--pred-canary-traffic-percent", + type=str, + help="The traffic split percentage between the candidate model and the last ready model", + default="100", + ) parser.add_argument( - "--framework", + "--pred-framework", type=str, - help="Model serving framework to use. Available frameworks: " + + help="Model serving framework to use for the Predictor. Available frameworks: " + str(list(AVAILABLE_FRAMEWORKS.keys())), default="" ) parser.add_argument( - "--runtime-version", + "--pred-runtime-version", type=str, help="Runtime Version of Machine Learning Framework", default="latest" ) parser.add_argument( - "--resource-requests", + "--pred-resource-requests", type=json.loads, - help="CPU and Memory requests for Model Serving", + help="CPU and Memory requests for the Predictor", default='{"cpu": "0.5", "memory": "512Mi"}', ) parser.add_argument( - "--resource-limits", + "--pred-resource-limits", type=json.loads, - help="CPU and Memory limits for Model Serving", + help="CPU and Memory limits for the Predictor", default='{"cpu": "1", "memory": "1Gi"}', ) parser.add_argument( - "--custom-model-spec", + "--pred-request-timeout", + type=str, + help="Specifies the number of seconds to wait before timing out a request to the Predictor", + default="60" + ) + parser.add_argument( + "--pred-custom-model-spec", type=json.loads, - help="The container spec for a custom model runtime", + help="The container spec for a custom Predictor runtime", default="{}", ) + parser.add_argument( - "--autoscaling-target", type=str, help="Autoscaling target number", default="0" + "--transf-min-replicas", + type=str, + help="Minimum number of Transformer replicas", + default="-1" ) parser.add_argument( - "--service-account", + "--transf-max-replicas", + type=str, + help="Maximum number of Transformer replicas", + default="-1" + ) + parser.add_argument( + "--transf-image", type=str, - help="Service account containing s3 credentials", - default="", + help="Docker image used for the Transformer pod container", ) parser.add_argument( - "--enable-istio-sidecar", - type=strtobool, - help="Whether to inject istio sidecar", - default="True" + "--transf-args", + type=str, + help="Arguments to the entrypoint of the Transformer pod container, overwrites CMD", ) parser.add_argument( - "--inferenceservice-yaml", - type=yaml.safe_load, - help="Raw InferenceService serialized YAML for deployment", - default="{}" + "--transf-uri", + type=str, + help="Path of the S3, GCS or ABS compatible directory containing the Transformer. Not necessary if the whole pre-/postprocessing logic is in the docker image", ) - parser.add_argument("--output-path", type=str, help="Path to store URI output") - parser.add_argument("--watch-timeout", - type=str, - help="Timeout seconds for watching until InferenceService becomes ready.", - default="300") parser.add_argument( - "--min-replicas", type=str, help="Minimum number of replicas", default="-1" + "--transf-resource-requests", + type=json.loads, + help="CPU and Memory requests for the Transformer", + default='{"cpu": "0.5", "memory": "512Mi"}', ) parser.add_argument( - "--max-replicas", type=str, help="Maximum number of replicas", default="-1" + "--transf-resource-limits", + type=json.loads, + help="CPU and Memory limits for the Transformer", + default='{"cpu": "1", "memory": "1Gi"}', + ) + parser.add_argument( + "--transf-request-timeout", + type=str, + help="Specifies the number of seconds to wait before timing out a request to the Transformer", + default="60" ) - parser.add_argument("--request-timeout", - type=str, - help="Specifies the number of seconds to wait before timing out a request to the component.", - default="60") - parser.add_argument("--enable-isvc-status", - type=strtobool, - help="Specifies whether to store the inference service status as the output parameter", - default="True") + + parser.add_argument("--output-path", type=str, help="Path to store URI output") args = parser.parse_args() action = args.action.lower() model_name = args.model_name - model_uri = args.model_uri - canary_traffic_percent = int(args.canary_traffic_percent) namespace = args.namespace - framework = args.framework.lower() - runtime_version = args.runtime_version.lower() - resource_requests = args.resource_requests - resource_limits = args.resource_limits - output_path = args.output_path - custom_model_spec = args.custom_model_spec autoscaling_target = int(args.autoscaling_target) - service_account = args.service_account enable_istio_sidecar = args.enable_istio_sidecar + enable_isvc_status = args.enable_isvc_status inferenceservice_yaml = args.inferenceservice_yaml watch_timeout = int(args.watch_timeout) - min_replicas = int(args.min_replicas) - max_replicas = int(args.max_replicas) - request_timeout = int(args.request_timeout) - enable_isvc_status = args.enable_isvc_status + service_account = args.service_account + pred_min_replicas = int(args.pred_min_replicas) + pred_max_replicas = int(args.pred_max_replicas) + pred_model_uri = args.pred_model_uri + pred_canary_traffic_percent = int(args.pred_canary_traffic_percent) + pred_framework = args.pred_framework.lower() + pred_runtime_version = args.pred_runtime_version.lower() + pred_resource_requests = args.pred_resource_requests + pred_resource_limits = args.pred_resource_limits + pred_request_timeout = int(args.pred_request_timeout) + pred_custom_model_spec = args.pred_custom_model_spec + transf_min_replicas = int(args.transf_min_replicas) + transf_max_replicas = int(args.transf_max_replicas) + transf_image = args.transf_image + transf_args = ast.literal_eval(args.transf_args) + transf_uri = args.transf_uri + transf_resource_requests = args.transf_resource_requests + transf_resource_limits = args.transf_resource_limits + transf_request_timeout = int(args.transf_request_timeout) + output_path = args.output_path # Default the namespace. if not namespace: @@ -400,30 +511,38 @@ def main(): # If the action isn't a delete, require 'model-uri' and 'framework' only if an Isvc YAML # or custom model container spec are not provided. if action != 'delete': - if not inferenceservice_yaml and not custom_model_spec and not (model_uri and framework): + if not inferenceservice_yaml and not pred_custom_model_spec and not (pred_model_uri and pred_framework): parser.error('Arguments for {} and {} are required when performing "{}" action'.format( - 'model_uri', 'framework', action + 'pred_model_uri', 'pred_framework', action )) model_status = perform_action( action=action, model_name=model_name, - model_uri=model_uri, - canary_traffic_percent=canary_traffic_percent, namespace=namespace, - framework=framework, - runtime_version=runtime_version, - resource_requests=resource_requests, - resource_limits=resource_limits, - custom_model_spec=custom_model_spec, autoscaling_target=autoscaling_target, - service_account=service_account, enable_istio_sidecar=enable_istio_sidecar, inferenceservice_yaml=inferenceservice_yaml, - request_timeout=request_timeout, watch_timeout=watch_timeout, - min_replicas=min_replicas, - max_replicas=max_replicas + service_account=service_account, + pred_min_replicas=pred_min_replicas, + pred_max_replicas=pred_max_replicas, + pred_model_uri=pred_model_uri, + pred_canary_traffic_percent=pred_canary_traffic_percent, + pred_framework=pred_framework, + pred_runtime_version=pred_runtime_version, + pred_resource_requests=pred_resource_requests, + pred_resource_limits=pred_resource_limits, + pred_custom_model_spec=pred_custom_model_spec, + pred_request_timeout=pred_request_timeout, + transf_min_replicas=transf_min_replicas, + transf_max_replicas=transf_max_replicas, + transf_image=transf_image, + transf_args=transf_args, + transf_uri=transf_uri, + transf_resource_requests=transf_resource_requests, + transf_resource_limits=transf_resource_limits, + transf_request_timeout=transf_request_timeout ) print(model_status) From e2647f33370a961c875a2ad7bbcd0304f7b59117 Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn Date: Sun, 29 Oct 2023 16:31:14 +0100 Subject: [PATCH 24/25] fix comma in component args --- components/kserve/component.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/kserve/component.yaml b/components/kserve/component.yaml index 04dd801d33c..68bc14d26f7 100644 --- a/components/kserve/component.yaml +++ b/components/kserve/component.yaml @@ -42,7 +42,7 @@ implementation: --namespace, {inputValue: Namespace}, --autoscaling-target, {inputValue: Autoscaling Target}, --enable-istio-sidecar, {inputValue: Enable Istio Sidecar}, - --enable-isvc-status, {inputValue: Enable ISVC Status} + --enable-isvc-status, {inputValue: Enable ISVC Status}, --inferenceservice-yaml, {inputValue: InferenceService YAML}, --watch-timeout, {inputValue: Watch Timeout}, --service-account, {inputValue: Service Account}, From 3e6812ca710aaa84474c6176397dd31faf5a3e12 Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn Date: Sun, 29 Oct 2023 16:45:06 +0100 Subject: [PATCH 25/25] fix more commas in component args --- components/kserve/component.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/components/kserve/component.yaml b/components/kserve/component.yaml index 68bc14d26f7..d879521aed6 100644 --- a/components/kserve/component.yaml +++ b/components/kserve/component.yaml @@ -56,13 +56,13 @@ implementation: --pred-resource-limits, {inputValue: Pred Resource Limits}, --pred-request-timeout, {inputValue: Pred Request Timeout}, --pred-custom-model-spec, {inputValue: Pred Custom Model Spec}, - --transf-min-replicas {inputValue: Transf Min Replicas}, - --transf-max-replicas {inputValue: Transf Max Replicas}, - --transf-uri, {inputValue: Transformer URI}, + --transf-min-replicas, {inputValue: Transf Min Replicas}, + --transf-max-replicas, {inputValue: Transf Max Replicas}, + --transf-uri, {inputValue: Transf URI}, --transf-image, {inputValue: Transf Image}, --transf-args, {inputValue: Transf Args}, --transf-resource-requests, {inputValue: Transf Resource Requests}, --transf-resource-limits, {inputValue: Transf Resource Limits}, - --transf-request-timeout, {inputValue: TransfRequest Timeout}, + --transf-request-timeout, {inputValue: Transf Request Timeout}, --output-path, {outputPath: InferenceService Status}, ]