rhoai-4386 model serving and ui updates for 2.8 (#25)

* rhoai-4386 model serving and ui updates for 2.8 * rhoai-4386 more updates based on downstream testing * rhoai-4386 addressed review comments * peer review comments incorporated * 4386 update pipeline screen captures * 4386 add note about Elyra * rhoai-4386 sme review updates * updates to include multi-model notebooks and update screen captures * fix typo and upload pipeline file
rh-aiservices-bu · Apr 4, 2024 · b9b5daf · b9b5daf
1 parent 1c261be
commit b9b5daf
Show file tree

Hide file tree

Showing 58 changed files with 347 additions and 250 deletions.
diff --git a/1_experiment_train.ipynb b/1_experiment_train.ipynb
@@ -2,7 +2,9 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "source": [
     "# Experiment"
    ]
@@ -155,7 +157,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Train the model"
+    "## Train the model\n",
+    "\n",
+    "Training a model is often the most time-consuming part of the machine learning process.  Large models can take multiple GPUs for days.  Expect the training on CPU for this very simple model to take a minute or more."
    ]
   },
   {
@@ -189,8 +193,8 @@
    "source": [
     "# Save the model as ONNX for easy use of ModelMesh\n",
     "model_proto, _ = tf2onnx.convert.from_keras(model)\n",
-    "os.makedirs(\"models/fraud\", exist_ok=True)\n",
-    "onnx.save(model_proto, \"models/fraud/model.onnx\")"
+    "os.makedirs(\"models/fraud/1\", exist_ok=True)\n",
+    "onnx.save(model_proto, \"models/fraud/1/model.onnx\")"
    ]
   },
   {
@@ -211,50 +215,6 @@
     "! ls -alRh ./models/"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create a date-stamped folder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create a date-stamped folder for fraud models\n",
-    "current_date = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n",
-    "fraud_folder = os.path.join(\"models/\", current_date + \"-fraud\")\n",
-    "os.makedirs(fraud_folder, exist_ok=True)\n",
-    "\n",
-    "# Save the model to the date-stamped folder\n",
-    "model_path = os.path.join(fraud_folder, \"model.onnx\")\n",
-    "onnx.save(model_proto, model_path)\n",
-    "\n",
-    "print(f\"Saved the model to {model_path}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "Confirm that the model file was created successfully. \n",
-    "The output should include the model file name, size, and date. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "! ls -alh ./models/"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -308,7 +268,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sess = rt.InferenceSession(\"models/fraud/model.onnx\", providers=rt.get_available_providers())\n",
+    "sess = rt.InferenceSession(\"models/fraud/1/model.onnx\", providers=rt.get_available_providers())\n",
     "input_name = sess.get_inputs()[0].name\n",
     "output_name = sess.get_outputs()[0].name\n",
     "y_pred_temp = sess.run([output_name], {input_name: scaler.transform(X_test.values).astype(np.float32)}) \n",

diff --git a/2_save_model.ipynb b/2_save_model.ipynb
@@ -30,7 +30,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "import os\n",
@@ -43,6 +45,10 @@
     "region_name = os.environ.get('AWS_DEFAULT_REGION')\n",
     "bucket_name = os.environ.get('AWS_S3_BUCKET')\n",
     "\n",
+    "if not all([aws_access_key_id, aws_secret_access_key, endpoint_url, region_name, bucket_name]):\n",
+    "    raise ValueError(\"One or data connection variables are empty.  \"\n",
+    "                     \"Please check your data connection to an S3 bucket.\")\n",
+    "\n",
     "session = boto3.session.Session(aws_access_key_id=aws_access_key_id,\n",
     "                                aws_secret_access_key=aws_secret_access_key)\n",
     "\n",
@@ -56,13 +62,17 @@
     "\n",
     "\n",
     "def upload_directory_to_s3(local_directory, s3_prefix):\n",
+    "    num_files = 0\n",
     "    for root, dirs, files in os.walk(local_directory):\n",
     "        for filename in files:\n",
     "            file_path = os.path.join(root, filename)\n",
     "            relative_path = os.path.relpath(file_path, local_directory)\n",
     "            s3_key = os.path.join(s3_prefix, relative_path)\n",
     "            print(f\"{file_path} -> {s3_key}\")\n",
     "            bucket.upload_file(file_path, s3_key)\n",
+    "            num_files += 1\n",
+    "    return num_files\n",
+    "\n",
     "\n",
     "def list_objects(prefix):\n",
     "    filter = bucket.objects.filter(Prefix=prefix)\n",
@@ -82,7 +92,7 @@
     "\n",
     "If this is the first time running the code, this cell will have no output.\n",
     "\n",
-    "If you've already uploaded your model, you should see this output: `models/fraud/model.onnx`\n"
+    "If you've already uploaded your model, you should see this output: `models/fraud/1/model.onnx`\n"
    ]
   },
   {
@@ -116,7 +126,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "upload_directory_to_s3(\"models\", \"models\")"
+    "local_models_directory = \"models\"\n",
+    "\n",
+    "if not os.path.isdir(local_models_directory):\n",
+    "    raise ValueError(f\"The directory '{local_models_directory}' does not exist.  \"\n",
+    "                     \"Did you finish training the model in the previous notebook?\")\n",
+    "\n",
+    "num_files = upload_directory_to_s3(\"models\", \"models\")\n",
+    "\n",
+    "if num_files == 0:\n",
+    "    raise ValueError(\"No files uploaded.  Did you finish training and \"\n",
+    "                     \"saving the model to the \\\"models\\\" directory?  \"\n",
+    "                     \"Check for \\\"models/fraud/1/model.onnx\\\"\")\n"
    ]
   },
   {

diff --git a/3_deploy_model.ipynb b/3_deploy_model.ipynb
diff --git a/4_rest_requests.ipynb → 3_rest_requests_multi_model.ipynb b/4_rest_requests.ipynb → 3_rest_requests_multi_model.ipynb
diff --git a/5_grpc_requests.ipynb → 4_grpc_requests_multi_model.ipynb b/5_grpc_requests.ipynb → 4_grpc_requests_multi_model.ipynb
diff --git a/5_rest_requests_single_model.ipynb b/5_rest_requests_single_model.ipynb
@@ -0,0 +1,130 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55c8afde-9b18-4b6a-9ee5-33924bdb4f16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# REST Inference"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2c004acc-13cd-4917-8480-592c7c2d623b",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "Change that following variable settings match your deployed model's *Inference endpoint*. for example: \n",
+    "\n",
+    "```\n",
+    "deployed_model_name = \"fraud\"\n",
+    "infer_endpoint = \"https://fraud-predictor-userx-workshop.apps.clusterx.sandboxx.opentlc.com\"\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0de65d02-84a6-4cff-882e-551cdd42b486",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "deployed_model_name = \"fraud\"\n",
+    "infer_endpoint = \"https://fraud-predictor-<project-name>.<cluster>.com\"\n",
+    "infer_url = f\"{infer_endpoint}/v2/models/{deployed_model_name}/infer\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d94f9ece-e9cf-44e2-a8a2-73160186aee8",
+   "metadata": {},
+   "source": [
+    "## Request Function\n",
+    "\n",
+    "Build and submit the REST request. \n",
+    "\n",
+    "Note: You submit the data in the same format that you used for an ONNX inference."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "54b9386f-683a-4880-b780-c40bec3ab9f8",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "\n",
+    "\n",
+    "def rest_request(data):\n",
+    "    json_data = {\n",
+    "        \"inputs\": [\n",
+    "            {\n",
+    "                \"name\": \"dense_input\",\n",
+    "                \"shape\": [1, 5],\n",
+    "                \"datatype\": \"FP32\",\n",
+    "                \"data\": data\n",
+    "            }\n",
+    "        ]\n",
+    "    }\n",
+    "\n",
+    "    response = requests.post(infer_url, json=json_data, verify=False)\n",
+    "    response_dict = response.json()\n",
+    "    return response_dict['outputs'][0]['data']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "45ad16ac-23da-48bd-9796-f8e4cacae981",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = [0.3111400080477545, 1.9459399775518593, 1.0, 0.0, 0.0]\n",
+    "prediction = rest_request(data)\n",
+    "prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1d66e0f7-4d4e-4879-bdf1-36b712432fd9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "threshhold = 0.995\n",
+    "\n",
+    "if (prediction[0] > threshhold):\n",
+    "    print('fraud')\n",
+    "else:\n",
+    "    print('not fraud')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}