From 7d01104fecbebf5e018b3890aab3a8cf9dee9527 Mon Sep 17 00:00:00 2001 From: Alibi Zhenis <92104549+AlibiZhenis@users.noreply.github.com> Date: Wed, 31 May 2023 23:19:08 +0500 Subject: [PATCH] Rename APIs for model serving framework (#159) * renaming APIs for OS 2.7 Signed-off-by: Alibi Zhenis * updating tests Signed-off-by: Alibi Zhenis * linting Signed-off-by: Alibi Zhenis * adding new methods and marking old ones as deprecated Signed-off-by: Alibi Zhenis * fixing tests Signed-off-by: Alibi Zhenis * updating documentation Signed-off-by: Alibi Zhenis * partially fixing tests for codecov/patch Signed-off-by: Alibi Zhenis * adding comments Signed-off-by: Alibi Zhenis * change constant name Signed-off-by: Alibi Zhenis * linting Signed-off-by: Alibi Zhenis * fixing tests Signed-off-by: Alibi Zhenis --------- Signed-off-by: Alibi Zhenis --- .github/workflows/integration.yml | 2 +- .../reference/api/ml_commons_deploy_api.rst | 6 + .../reference/api/ml_commons_register_api.rst | 6 + .../ml_commons_register_pretrained_api.rst | 6 + .../api/ml_commons_undeploy_model_api.rst | 6 + .../api/ml_commons_upload_pretrained_api.rst | 2 +- docs/source/reference/mlcommons.rst | 46 +++- .../ml_commons/ml_common_utils.py | 2 +- .../ml_commons/ml_commons_client.py | 232 +++++++++++++++--- opensearch_py_ml/ml_commons/model_uploader.py | 18 +- requirements-dev.txt | 1 + requirements.txt | 3 +- tests/ml_commons/test_ml_commons_client.py | 190 +++++++++++++- 13 files changed, 454 insertions(+), 66 deletions(-) create mode 100644 docs/source/reference/api/ml_commons_deploy_api.rst create mode 100644 docs/source/reference/api/ml_commons_register_api.rst create mode 100644 docs/source/reference/api/ml_commons_register_pretrained_api.rst create mode 100644 docs/source/reference/api/ml_commons_undeploy_model_api.rst diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 9e7f3f32..607bd356 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -12,7 +12,7 @@ jobs: cluster: ["opensearch"] secured: ["true"] entry: - - { opensearch_version: 2.6.0 } + - { opensearch_version: 2.7.0 } steps: - name: Checkout diff --git a/docs/source/reference/api/ml_commons_deploy_api.rst b/docs/source/reference/api/ml_commons_deploy_api.rst new file mode 100644 index 00000000..a577b63a --- /dev/null +++ b/docs/source/reference/api/ml_commons_deploy_api.rst @@ -0,0 +1,6 @@ +Deploy Model +================== + +.. currentmodule:: opensearch_py_ml + +.. autofunction:: opensearch_py_ml.ml_commons.MLCommonClient.deploy_model diff --git a/docs/source/reference/api/ml_commons_register_api.rst b/docs/source/reference/api/ml_commons_register_api.rst new file mode 100644 index 00000000..29a69703 --- /dev/null +++ b/docs/source/reference/api/ml_commons_register_api.rst @@ -0,0 +1,6 @@ +Register Model +================== + +.. currentmodule:: opensearch_py_ml + +.. autofunction:: opensearch_py_ml.ml_commons.MLCommonClient.register_model diff --git a/docs/source/reference/api/ml_commons_register_pretrained_api.rst b/docs/source/reference/api/ml_commons_register_pretrained_api.rst new file mode 100644 index 00000000..2cf3fb05 --- /dev/null +++ b/docs/source/reference/api/ml_commons_register_pretrained_api.rst @@ -0,0 +1,6 @@ +Register Pretrained Model +============================== + +.. currentmodule:: opensearch_py_ml + +.. autofunction:: opensearch_py_ml.ml_commons.MLCommonClient.register_pretrained_model diff --git a/docs/source/reference/api/ml_commons_undeploy_model_api.rst b/docs/source/reference/api/ml_commons_undeploy_model_api.rst new file mode 100644 index 00000000..e1824c56 --- /dev/null +++ b/docs/source/reference/api/ml_commons_undeploy_model_api.rst @@ -0,0 +1,6 @@ +Undeploy Model +================== + +.. currentmodule:: opensearch_py_ml + +.. autofunction:: opensearch_py_ml.ml_commons.MLCommonClient.undeploy_model diff --git a/docs/source/reference/api/ml_commons_upload_pretrained_api.rst b/docs/source/reference/api/ml_commons_upload_pretrained_api.rst index 73cff858..8016ceb7 100644 --- a/docs/source/reference/api/ml_commons_upload_pretrained_api.rst +++ b/docs/source/reference/api/ml_commons_upload_pretrained_api.rst @@ -1,5 +1,5 @@ Upload Pretrained Model -================== +=========================== .. currentmodule:: opensearch_py_ml diff --git a/docs/source/reference/mlcommons.rst b/docs/source/reference/mlcommons.rst index 258dc853..7834d6cf 100644 --- a/docs/source/reference/mlcommons.rst +++ b/docs/source/reference/mlcommons.rst @@ -6,63 +6,91 @@ MLCommons Support .. currentmodule:: opensearch_py_ml Upload Model -~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 2 api/ml_commons_upload_api +Register Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. toctree:: + :maxdepth: 2 + + api/ml_commons_register_api + Upload Pretrained Model -~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 2 api/ml_commons_upload_pretrained_api +Register Pretrained Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. toctree:: + :maxdepth: 2 + + api/ml_commons_register_pretrained_api + Load Model -~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 2 api/ml_commons_load_api +Deploy Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. toctree:: + :maxdepth: 2 + + api/ml_commons_deploy_api + Get Task Info -~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 2 api/ml_commons_get_task_info_api Get Model Info -~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 2 api/ml_commons_get_model_info_api Generate Embedding -~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 2 api/ml_commons_generate_embedding_api Unload Model -~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 2 api/ml_commons_unload_model_api +Undeploy Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. toctree:: + :maxdepth: 2 + + api/ml_commons_undeploy_model_api + Delete Model -~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 2 api/ml_commons_delete_model_api Delete Task -~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 2 diff --git a/opensearch_py_ml/ml_commons/ml_common_utils.py b/opensearch_py_ml/ml_commons/ml_common_utils.py index 921d0c5d..9171230c 100644 --- a/opensearch_py_ml/ml_commons/ml_common_utils.py +++ b/opensearch_py_ml/ml_commons/ml_common_utils.py @@ -6,7 +6,7 @@ # GitHub history for details. ML_BASE_URI = "/_plugins/_ml" -MODEL_UPLOAD_CHUNK_SIZE = 10_000_000 +MODEL_CHUNK_MAX_SIZE = 10_000_000 MODEL_MAX_SIZE = 4_000_000_000 BUF_SIZE = 65536 # lets read stuff in 64kb chunks! TIMEOUT = 120 # timeout for synchronous method calls in seconds diff --git a/opensearch_py_ml/ml_commons/ml_commons_client.py b/opensearch_py_ml/ml_commons/ml_commons_client.py index 850f21d6..0c3cb83a 100644 --- a/opensearch_py_ml/ml_commons/ml_commons_client.py +++ b/opensearch_py_ml/ml_commons/ml_commons_client.py @@ -9,6 +9,7 @@ import time from typing import Any, List, Union +from deprecated.sphinx import deprecated from opensearchpy import OpenSearch from opensearch_py_ml.ml_commons.ml_common_utils import ML_BASE_URI, TIMEOUT @@ -17,7 +18,7 @@ class MLCommonClient: """ - A client that communicates to the ml-common plugin for OpenSearch. This client allows for uploading of trained + A client that communicates to the ml-common plugin for OpenSearch. This client allows for registration of trained machine learning models to an OpenSearch index. """ @@ -25,6 +26,10 @@ def __init__(self, os_client: OpenSearch): self._client = os_client self._model_uploader = ModelUploader(os_client) + @deprecated( + reason="Since OpenSearch 2.7.0, you can use register_model instead", + version="2.7.0", + ) def upload_model( self, model_path: str, @@ -34,8 +39,8 @@ def upload_model( wait_until_loaded: bool = True, ) -> str: """ - This method uploads model into opensearch cluster using ml-common plugin's api. - first this method creates a model id to store model metadata and then breaks the model zip file into + This method registers the model in the opensearch cluster using ml-common plugin's api. + First, this method creates a model id to store model metadata and then breaks the model zip file into multiple chunks and then upload chunks into opensearch cluster :param model_path: path of the zip file of the model @@ -58,14 +63,14 @@ def upload_model( :type model_config_path: string :param isVerbose: if isVerbose is true method will print more messages. default False :type isVerbose: boolean - :param load_model: Whether to load the model in memory using uploaded model chunks + :param load_model: Whether to deploy the model using uploaded model chunks :type load_model: bool - :param wait_until_loaded: If load_model is true, whether to wait until the model is loaded into memory + :param wait_until_loaded: If load_model is true, whether to wait until the model is deployed :type wait_until_loaded: bool :return: returns the model_id so that we can use this for further operation. :rtype: string """ - model_id = self._model_uploader._upload_model( + model_id = self._model_uploader._register_model( model_path, model_config_path, isVerbose ) @@ -75,6 +80,60 @@ def upload_model( return model_id + def register_model( + self, + model_path: str, + model_config_path: str, + isVerbose: bool = False, + deploy_model: bool = True, + wait_until_deployed: bool = True, + ) -> str: + """ + This method registers the model in the opensearch cluster using ml-common plugin's api. + First, this method creates a model id to store model metadata and then breaks the model zip file into + multiple chunks and then upload chunks into opensearch cluster + + :param model_path: path of the zip file of the model + :type model_path: string + :param model_config_path: filepath of the model metadata. A json file of model metadata is expected + Model metadata format example: + { + "name": "all-MiniLM-L6-v2", + "version": 1, + "model_format": "TORCH_SCRIPT", + "model_config": { + "model_type": "bert", + "embedding_dimension": 384, + "framework_type": "sentence_transformers", + }, + } + + refer to: + https://opensearch.org/docs/latest/ml-commons-plugin/model-serving-framework/#upload-model-to-opensearch + :type model_config_path: string + :param isVerbose: if isVerbose is true method will print more messages. default False + :type isVerbose: boolean + :param deploy_model: Whether to deploy the model using uploaded model chunks + :type deploy_model: bool + :param wait_until_deployed: If deploy_model is true, whether to wait until the model is deployed + :type wait_until_deployed: bool + :return: returns the model_id so that we can use this for further operation. + :rtype: string + """ + model_id = self._model_uploader._register_model( + model_path, model_config_path, isVerbose + ) + + # loading the model chunks from model index + if deploy_model: + self.deploy_model(model_id, wait_until_deployed=wait_until_deployed) + + return model_id + + @deprecated( + reason="Since OpenSearch 2.7.0, you can use register_pretrained_model instead", + version="2.7.0", + ) def upload_pretrained_model( self, model_name: str, @@ -84,9 +143,8 @@ def upload_pretrained_model( wait_until_loaded: bool = True, ): """ - This method uploads a pretrained model into opensearch cluster using ml-common plugin's api. - First, this method creates a model id to store model info - and then loads the model in memory if load_model is True. + This method registers the pretrained model in the opensearch cluster using ml-common plugin's api. + First, this method creates a model id to store model info and then deploys the model if load_model is True. The model has to be supported by ML Commons. Refer to https://opensearch.org/docs/latest/ml-commons-plugin/pretrained-models/. :param model_name: Name of the pretrained model @@ -95,9 +153,9 @@ def upload_pretrained_model( :type model_version: string :param model_format: "TORCH_SCRIPT" or "ONNX" :type model_format: string - :param load_model: Whether to load the model in memory using uploaded model chunks + :param load_model: Whether to deploy the model using uploaded model chunks :type load_model: bool - :param wait_until_loaded: If load_model is true, whether to wait until the model is loaded into memory + :param wait_until_loaded: If load_model is true, whether to wait until the model is deployed :type wait_until_loaded: bool :return: returns the model_id so that we can use this for further operation :rtype: string @@ -116,9 +174,49 @@ def upload_pretrained_model( return model_id + def register_pretrained_model( + self, + model_name: str, + model_version: str, + model_format: str, + deploy_model: bool = True, + wait_until_deployed: bool = True, + ): + """ + This method registers the pretrained model in the opensearch cluster using ml-common plugin's api. + First, this method creates a model id to store model info and then deploys the model if deploy_model is True. + The model has to be supported by ML Commons. Refer to https://opensearch.org/docs/latest/ml-commons-plugin/pretrained-models/. + + :param model_name: Name of the pretrained model + :type model_name: string + :param model_version: Version of the pretrained model + :type model_version: string + :param model_format: "TORCH_SCRIPT" or "ONNX" + :type model_format: string + :param deploy_model: Whether to deploy the model using uploaded model chunks + :type deploy_model: bool + :param wait_until_deployed: If deploy_model is true, whether to wait until the model is deployed + :type wait_until_deployed: bool + :return: returns the model_id so that we can use this for further operation + :rtype: string + """ + # creating model meta doc + model_config_json = { + "name": model_name, + "version": model_version, + "model_format": model_format, + } + model_id = self._send_model_info(model_config_json) + + # loading the model chunks from model index + if deploy_model: + self.deploy_model(model_id, wait_until_deployed=wait_until_deployed) + + return model_id + def _send_model_info(self, model_meta_json: dict): """ - This method sends the pretrained model info to ML Commons' upload api + This method sends the pretrained model info to ML Commons' register api :param model_meta_json: a dictionary object with model configurations :type model_meta_json: dict @@ -127,7 +225,7 @@ def _send_model_info(self, model_meta_json: dict): """ output: Union[bool, Any] = self._client.transport.perform_request( method="POST", - url=f"{ML_BASE_URI}/models/_upload", + url=f"{ML_BASE_URI}/models/_register", body=model_meta_json, ) end = time.time() + TIMEOUT # timeout seconds @@ -137,54 +235,99 @@ def _send_model_info(self, model_meta_json: dict): status = self._get_task_info(output["task_id"]) if status["state"] != "CREATED": task_flag = True + # TODO: need to add the test case later for this line if not task_flag: - raise TimeoutError("Uploading model timed out") + raise TimeoutError("Model registration timed out") if status["state"] == "FAILED": raise Exception(status["error"]) - print("Model was uploaded successfully. Model Id: ", status["model_id"]) + print("Model was registered successfully. Model Id: ", status["model_id"]) return status["model_id"] + @deprecated( + reason="Since OpenSearch 2.7.0, you can use deploy_model instead", + version="2.7.0", + ) def load_model(self, model_id: str, wait_until_loaded: bool = True) -> object: """ - This method loads model into opensearch cluster using ml-common plugin's load model api + This method deploys a model in the opensearch cluster using ml-common plugin's deploy model api :param model_id: unique id of the model :type model_id: string - :param wait_until_loaded: Whether to wait until the model is loaded into memory + :param wait_until_loaded: Whether to wait until the model is deployed :type wait_until_loaded: bool :return: returns a json object, with task_id and status key. :rtype: object """ - API_URL = f"{ML_BASE_URI}/models/{model_id}/_load" + API_URL = f"{ML_BASE_URI}/models/{model_id}/_deploy" task_id = self._client.transport.perform_request(method="POST", url=API_URL)[ "task_id" ] if wait_until_loaded: - # Wait until loaded + # Wait until deployed for i in range(TIMEOUT): ml_model_status = self.get_model_info(model_id) model_state = ml_model_status.get("model_state") - if model_state in ["LOADED", "PARTIALLY_LOADED"]: + if model_state in ["DEPLOYED", "PARTIALLY_DEPLOYED"]: break time.sleep(1) + # TODO: need to add the test case later for this line # Check the model status - if model_state == "LOADED": - print("Model loaded into memory successfully") - elif model_state == "PARTIALLY_LOADED": - print("Model was loaded into memory only partially") + if model_state == "DEPLOYED": + print("Model deployed successfully") + elif model_state == "PARTIALLY_DEPLOYED": + print("Model deployed only partially") + # TODO: need to add the test case later for this line else: - raise Exception("Model load failed") + raise Exception("Model deployment failed") + + return self._get_task_info(task_id) + + def deploy_model(self, model_id: str, wait_until_deployed: bool = True) -> object: + """ + This method deploys a model in the opensearch cluster using ml-common plugin's deploy model api + + :param model_id: unique id of the model + :type model_id: string + :param wait_until_deployed: Whether to wait until the model is deployed + :type wait_until_deployed: bool + :return: returns a json object, with task_id and status key. + :rtype: object + """ + + API_URL = f"{ML_BASE_URI}/models/{model_id}/_deploy" + + task_id = self._client.transport.perform_request(method="POST", url=API_URL)[ + "task_id" + ] + + if wait_until_deployed: + # Wait until deployed + for i in range(TIMEOUT): + ml_model_status = self.get_model_info(model_id) + model_state = ml_model_status.get("model_state") + if model_state in ["DEPLOYED", "PARTIALLY_DEPLOYED"]: + break + time.sleep(1) + + # TODO: need to add the test case later for this line + # Check the model status + if model_state == "DEPLOYED": + print("Model deployed successfully") + elif model_state == "PARTIALLY_DEPLOYED": + print("Model deployed only partially") + else: + raise Exception("Model deployment failed") return self._get_task_info(task_id) def get_task_info(self, task_id: str, wait_until_task_done: bool = False) -> object: """ This method return information about a task running into opensearch cluster (using ml commons api) - when we load a model + when we deploy a model :param task_id: unique id of the task :type task_id: string @@ -218,7 +361,7 @@ def _get_task_info(self, task_id: str): def get_model_info(self, model_id: str) -> object: """ - This method return information about a model uploaded into opensearch cluster (using ml commons api) + This method return information about a model registered in the opensearch cluster (using ml commons api) :param model_id: unique id of the model :type model_id: string @@ -256,19 +399,48 @@ def generate_embedding(self, model_id: str, sentences: List[str]) -> object: body=API_BODY, ) + @deprecated( + reason="Since OpenSearch 2.7.0, you can use undeploy_model instead", + version="2.7.0", + ) def unload_model(self, model_id: str, node_ids: List[str] = []) -> object: """ - This method unloads a model from all the nodes or from the given list of nodes (using ml commons _unload api) + This method undeploys a model from all the nodes or from the given list of nodes (using ml commons _undeploy api) + + :param model_id: unique id of the nlp model + :type model_id: string + :param node_ids: List of nodes + :type node_ids: list of string + :return: returns a json object with defining from which nodes the model was undeployed. + :rtype: object + """ + + API_URL = f"{ML_BASE_URI}/models/{model_id}/_undeploy" + + # TODO: need to add the test case later for this line + API_BODY = {} + if len(node_ids) > 0: + API_BODY["node_ids"] = node_ids + + return self._client.transport.perform_request( + method="POST", + url=API_URL, + body=API_BODY, + ) + + def undeploy_model(self, model_id: str, node_ids: List[str] = []) -> object: + """ + This method undeploys a model from all the nodes or from the given list of nodes (using ml commons _undeploy api) :param model_id: unique id of the nlp model :type model_id: string :param node_ids: List of nodes :type node_ids: list of string - :return: returns a json object with defining from which nodes the model has unloaded. + :return: returns a json object with defining from which nodes the model was undeployed. :rtype: object """ - API_URL = f"{ML_BASE_URI}/models/{model_id}/_unload" + API_URL = f"{ML_BASE_URI}/models/{model_id}/_undeploy" API_BODY = {} if len(node_ids) > 0: diff --git a/opensearch_py_ml/ml_commons/model_uploader.py b/opensearch_py_ml/ml_commons/model_uploader.py index 03c74841..72e5454c 100644 --- a/opensearch_py_ml/ml_commons/model_uploader.py +++ b/opensearch_py_ml/ml_commons/model_uploader.py @@ -16,14 +16,14 @@ from opensearch_py_ml.ml_commons.ml_common_utils import ( BUF_SIZE, ML_BASE_URI, + MODEL_CHUNK_MAX_SIZE, MODEL_MAX_SIZE, - MODEL_UPLOAD_CHUNK_SIZE, ) class ModelUploader: """ - Class for uploading model using ml-commons apis in opensearch cluster. + Class for registering a model using ml-commons apis in opensearch cluster. """ META_API_ENDPOINT = "models/meta" @@ -40,12 +40,12 @@ class ModelUploader: def __init__(self, os_client: OpenSearch): self._client = os_client - def _upload_model( + def _register_model( self, model_path: str, model_meta_path: str, isVerbose: bool ) -> str: """ - This method uploads model into opensearch cluster using ml-common plugin's api. - first this method creates a model id to store model metadata and then breaks the model zip file into + This method registers the model in the opensearch cluster using ml-common plugin's register model api. + First, this method creates a model id to store model metadata and then breaks the model zip file into multiple chunks and then upload chunks into cluster. :param model_path: path of the zip file of the model @@ -75,9 +75,7 @@ def _upload_model( if os.stat(model_path).st_size > MODEL_MAX_SIZE: raise Exception("Model file size exceeds the limit of 4GB") - total_num_chunks: int = ceil( - os.stat(model_path).st_size / MODEL_UPLOAD_CHUNK_SIZE - ) + total_num_chunks: int = ceil(os.stat(model_path).st_size / MODEL_CHUNK_MAX_SIZE) # we are generating the sha1 hash for the model zip file hash_val_model_file = self._generate_hash(model_path) @@ -112,7 +110,7 @@ def _upload_model( def model_file_chunk_generator() -> Iterable[str]: with open(model_path, "rb") as f: while True: - data = f.read(MODEL_UPLOAD_CHUNK_SIZE) + data = f.read(MODEL_CHUNK_MAX_SIZE) if not data: break yield data # type: ignore # check if we actually need to do base64 encoding @@ -130,7 +128,7 @@ def model_file_chunk_generator() -> Iterable[str]: if isVerbose: print("Model id:", output) - print("Model uploaded successfully") + print("Model registered successfully") return model_id else: raise Exception( diff --git a/requirements-dev.txt b/requirements-dev.txt index 76e8a6e0..c2b91d82 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -10,6 +10,7 @@ accelerate sentence_transformers tqdm transformers +deprecated # # Testing diff --git a/requirements.txt b/requirements.txt index ec31969d..817ffa1f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,5 @@ torch==1.13.1 accelerate sentence_transformers tqdm -transformers \ No newline at end of file +transformers +deprecated diff --git a/tests/ml_commons/test_ml_commons_client.py b/tests/ml_commons/test_ml_commons_client.py index 431a72d0..96687d9b 100644 --- a/tests/ml_commons/test_ml_commons_client.py +++ b/tests/ml_commons/test_ml_commons_client.py @@ -28,7 +28,6 @@ os.path.dirname(os.path.abspath("__file__")), "tests", "sample_zip" ) - MODEL_FILE_ZIP_NAME = "test_model.zip" MODEL_FILE_PT_NAME = "test_model.pt" MODEL_CONFIG_FILE_NAME = "ml-commons_model_config.json" @@ -73,7 +72,7 @@ def test_init(): assert type(ml_client._model_uploader) == ModelUploader -def test_integration_pretrained_model_upload_unload_delete(): +def test_DEPRECATED_integration_pretrained_model_upload_unload_delete(): raised = False try: model_id = ml_client.upload_pretrained_model( @@ -84,10 +83,12 @@ def test_integration_pretrained_model_upload_unload_delete(): wait_until_loaded=True, ) ml_model_status = ml_client.get_model_info(model_id) - assert ml_model_status.get("model_state") != "LOAD_FAILED" + assert ml_model_status.get("model_state") != "DEPLOY_FAILED" except: # noqa: E722 raised = True - assert raised == False, "Raised Exception during pretrained model upload and load" + assert ( + raised == False + ), "Raised Exception during pretrained model registration and deployment" if model_id: raised = False @@ -103,10 +104,10 @@ def test_integration_pretrained_model_upload_unload_delete(): try: ml_client.unload_model(model_id) ml_model_status = ml_client.get_model_info(model_id) - assert ml_model_status.get("model_state") == "UNLOADED" + assert ml_model_status.get("model_state") != "UNDEPLOY_FAILED" except: # noqa: E722 raised = True - assert raised == False, "Raised Exception in unloading pretrained model" + assert raised == False, "Raised Exception in pretrained model undeployment" raised = False try: @@ -117,13 +118,60 @@ def test_integration_pretrained_model_upload_unload_delete(): assert raised == False, "Raised Exception in deleting pretrained model" -def test_integration_model_train_upload_full_cycle(): +def test_integration_pretrained_model_register_undeploy_delete(): + raised = False + try: + model_id = ml_client.register_pretrained_model( + model_name=PRETRAINED_MODEL_NAME, + model_version=PRETRAINED_MODEL_VERSION, + model_format=PRETRAINED_MODEL_FORMAT, + deploy_model=True, + wait_until_deployed=True, + ) + ml_model_status = ml_client.get_model_info(model_id) + assert ml_model_status.get("model_state") != "DEPLOY_FAILED" + except: # noqa: E722 + raised = True + assert ( + raised == False + ), "Raised Exception during pretrained model registration and deployment" + + if model_id: + raised = False + try: + ml_model_status = ml_client.get_model_info(model_id) + assert ml_model_status.get("model_format") == "TORCH_SCRIPT" + assert ml_model_status.get("algorithm") == "TEXT_EMBEDDING" + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception in getting pretrained model info" + + raised = False + try: + ml_client.undeploy_model(model_id) + ml_model_status = ml_client.get_model_info(model_id) + assert ml_model_status.get("model_state") != "UNDEPLOY_FAILED" + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception in pretrained model undeployment" + + raised = False + try: + delete_model_obj = ml_client.delete_model(model_id) + assert delete_model_obj.get("result") == "deleted" + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception in deleting pretrained model" + + +def test_DEPRECATED_integration_model_train_upload_full_cycle(): # first training the model with small epoch test_model.train( read_path=TESTDATA_SYNTHETIC_QUERY_ZIP, output_model_name=MODEL_FILE_PT_NAME, zip_file_name=MODEL_FILE_ZIP_NAME, num_epochs=1, + overwrite=True, ) # second generating the config file to create metadoc of the model in opensearch. test_model.make_model_config_json() @@ -142,7 +190,7 @@ def test_integration_model_train_upload_full_cycle(): print("Model_id:", model_id) except: # noqa: E722 raised = True - assert raised == False, "Raised Exception during model upload" + assert raised == False, "Raised Exception during model registration" if model_id: raised = False @@ -152,10 +200,10 @@ def test_integration_model_train_upload_full_cycle(): assert task_id != "" or task_id is not None ml_model_status = ml_client.get_model_info(model_id) - assert ml_model_status.get("model_state") != "LOAD_FAILED" + assert ml_model_status.get("model_state") != "DEPLOY_FAILED" except: # noqa: E722 raised = True - assert raised == False, "Raised Exception in loading model" + assert raised == False, "Raised Exception in model deployment" raised = False try: @@ -173,7 +221,7 @@ def test_integration_model_train_upload_full_cycle(): ml_task_status = ml_client.get_task_info( task_id, wait_until_task_done=True ) - assert ml_task_status.get("task_type") == "LOAD_MODEL" + assert ml_task_status.get("task_type") == "DEPLOY_MODEL" print("State:", ml_task_status.get("state")) assert ml_task_status.get("state") != "FAILED" except: # noqa: E722 @@ -205,10 +253,126 @@ def test_integration_model_train_upload_full_cycle(): try: ml_client.unload_model(model_id) ml_model_status = ml_client.get_model_info(model_id) - assert ml_model_status.get("model_state") == "UNLOADED" + assert ml_model_status.get("model_state") != "UNDEPLOY_FAILED" + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception in model undeployment" + + raised = False + try: + delete_model_obj = ml_client.delete_model(model_id) + assert delete_model_obj.get("result") == "deleted" + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception in deleting model" + + +def test_integration_model_train_register_full_cycle(): + # first training the model with small epoch + test_model.train( + read_path=TESTDATA_SYNTHETIC_QUERY_ZIP, + output_model_name=MODEL_FILE_PT_NAME, + zip_file_name=MODEL_FILE_ZIP_NAME, + num_epochs=1, + overwrite=True, + ) + # second generating the config file to create metadoc of the model in opensearch. + test_model.make_model_config_json() + model_file_exists = exists(MODEL_PATH) + model_config_file_exists = exists(MODEL_CONFIG_FILE_PATH) + assert model_file_exists == True + assert model_config_file_exists == True + if model_file_exists and model_config_file_exists: + model_id = "" + task_id = "" + + # Testing deploy_model = True for codecov/patch + raised = False + try: + ml_client.register_model( + MODEL_PATH, MODEL_CONFIG_FILE_PATH, deploy_model=True, isVerbose=True + ) + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception during first model registration" + + raised = False + try: + model_id = ml_client.register_model( + MODEL_PATH, MODEL_CONFIG_FILE_PATH, deploy_model=False, isVerbose=True + ) + print("Model_id:", model_id) + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception during second model registration" + + if model_id: + raised = False + try: + ml_load_status = ml_client.deploy_model( + model_id, wait_until_deployed=False + ) + task_id = ml_load_status.get("task_id") + assert task_id != "" or task_id is not None + + ml_model_status = ml_client.get_model_info(model_id) + assert ml_model_status.get("model_state") != "DEPLOY_FAILED" + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception in model deployment" + + raised = False + try: + ml_model_status = ml_client.get_model_info(model_id) + assert ml_model_status.get("model_format") == "TORCH_SCRIPT" + assert ml_model_status.get("algorithm") == "TEXT_EMBEDDING" + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception in getting model info" + + if task_id: + raised = False + ml_task_status = None + try: + ml_task_status = ml_client.get_task_info( + task_id, wait_until_task_done=True + ) + assert ml_task_status.get("task_type") == "DEPLOY_MODEL" + print("State:", ml_task_status.get("state")) + assert ml_task_status.get("state") != "FAILED" + except: # noqa: E722 + print("Model Task Status:", ml_task_status) + raised = True + assert raised == False, "Raised Exception in pulling task info" + # This is test is being flaky. Sometimes the test is passing and sometimes showing 500 error + # due to memory circuit breaker. + # Todo: We need to revisit this test. + try: + raised = False + sentences = ["First test sentence", "Second test sentence"] + embedding_result = ml_client.generate_embedding(model_id, sentences) + print(embedding_result) + assert len(embedding_result.get("inference_results")) == 2 + except: # noqa: E722 + raised = True + assert ( + raised == False + ), "Raised Exception in generating sentence embedding" + + try: + delete_task_obj = ml_client.delete_task(task_id) + assert delete_task_obj.get("result") == "deleted" + except: # noqa: E722 + raised = True + assert raised == False, "Raised Exception in deleting task" + + try: + ml_client.undeploy_model(model_id) + ml_model_status = ml_client.get_model_info(model_id) + assert ml_model_status.get("model_state") != "UNDEPLOY_FAILED" except: # noqa: E722 raised = True - assert raised == False, "Raised Exception in unloading model" + assert raised == False, "Raised Exception in model undeployment" raised = False try: