diff --git a/01-ai-core-azure-openai-proxy/proxy.ipynb b/01-ai-core-azure-openai-proxy/proxy.ipynb index 7ce21d7..ea53a69 100644 --- a/01-ai-core-azure-openai-proxy/proxy.ipynb +++ b/01-ai-core-azure-openai-proxy/proxy.ipynb @@ -1,503 +1,529 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install AI Core Python SDK" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "!pip install ai-core-sdk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import requests\n", - "import time\n", - "import yaml\n", - "from IPython.display import clear_output\n", - "from pprint import pprint\n", - "\n", - "from ai_core_sdk.models import ParameterBinding, Status" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Setup\n", - "aic_service_key_path = \"./resources/aic_service_key.json\"\n", - "git_setup_file_path = \"./resources/git_setup.json\"\n", - "docker_secret_file_path = \"./resources/docker_secret.json\"\n", - "env_file_path = \"./resources/env.json\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Connect to your AI Core instance" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load Library\n", - "with open(aic_service_key_path) as ask:\n", - " aic_service_key = json.load(ask)\n", - "\n", - "from ai_core_sdk.ai_core_v2_client import AICoreV2Client\n", - "\n", - "# Create Connection\n", - "ai_core_client = AICoreV2Client(\n", - " base_url = aic_service_key[\"serviceurls\"][\"AI_API_URL\"] + \"/v2\", # The present AI API version is 2\n", - " auth_url= aic_service_key[\"url\"] + \"/oauth/token\",\n", - " client_id = aic_service_key[\"clientid\"],\n", - " client_secret = aic_service_key[\"clientsecret\"]\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Returns no output*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Test connection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "response = ai_core_client.repositories.query()\n", - "print(response.count) # Should return integer value else your values in above step are incorrect" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Onboard the Git repository that contains the templates" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# WARNING: Refrain from onboarding again if previously onboarded\n", - "# else you will get AIAPIServerException 409\n", - "\n", - "with open(git_setup_file_path) as gs:\n", - "\t\tgit_key = json.load(gs)\n", - "\n", - "ai_core_client.repositories.create(\n", - " name = \"azure-openai-aicore\",\n", - " url = f\"https://github.com/{git_key['username']}/azure-openai-aicore-cap-api\", # Forked repo\n", - " username = git_key[\"username\"],\n", - " password = git_key[\"password\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Check onboarded repositories\n", - "response = ai_core_client.repositories.query()\n", - "#\n", - "for repository in response.resources:\n", - " print('Name:', repository.name)\n", - " print('URL:', repository.url)\n", - " print('Status:', repository.status)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Expected Output*\n", - "\n", - "```\n", - "...\n", - "Name: azure-openai-aicore\n", - "URL: https://github.com/john/azure-openai-aicore-cap-api\n", - "Status: RepositoryStatus.COMPLETED\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Register an application" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# WARNING: Run only once\n", - "\n", - "ai_core_client.applications.create(\n", - " repository_url = f\"https://github.com/{git_key['username']}/azure-openai-aicore-cap-api\",\n", - " path = \"01-ai-core-azure-openai-proxy/scenario\", # Scan this folder for instruction YAML files\n", - " revision = \"HEAD\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# List scenarios scanned by the application created above\n", - "response = ai_core_client.scenario.query(resource_group='default')\n", - "\n", - "for scenario in response.resources:\n", - " print(scenario.__dict__)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Expected Output*\n", - "\n", - "```\n", - "...\n", - "{'id': 'azure-openai-proxy', 'name': 'Azure OpenAI Proxy', 'description': 'Azure OpenAI Proxy', 'labels': None, ...)}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4 Docker Hub (optional)\n", - "#### 4.1 Register Docker secret on SAP BTP, AI Core (optional)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with open(docker_secret_file_path) as dsf:\n", - " docker_secret = json.load(dsf)\n", - "\n", - "\n", - "response = ai_core_client.docker_registry_secrets.create(\n", - " name = docker_secret[\"name\"],\n", - " data = docker_secret[\"data\"]\n", - ")\n", - "\n", - "print(response.__dict__)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Expected Output*\n", - "\n", - "```\n", - "{'message': 'secret has been created'}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4.2 Build and push Docker image (optional)\n", - "```\n", - "$ cd proxy\n", - "$ docker build -t {DOCKER_USERNAME}/azure-openai-proxy .\n", - "$ docker push {DOCKER_USERNAME}/azure-openai-proxy\n", - "```\n", - "\n", - "THROUGH THE DOCKER CLI. \n", - "See: https://developers.sap.com/tutorials/ai-core-aiapi-clientsdk-workflows.html#f824a41d-efe8-4883-8238-caef4ac5f789" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Create a resource group (Optional)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# For Free Tier AI Core Serice:\n", - "# you will not be able to create a new resource group.\n", - "# resource group named `default` exists in all systems.\n", - "# cell execution does not impact existing contents\n", - "#\n", - "# For paid AI Core service: \n", - "# IF you wish execute this step\n", - "# You are NOT REQUIRED TO RE-EXECUTE or Redo previous completed steps.\n", - "# Ensure that in the steps that follows modify with your resource group name\n", - "\n", - "resource_group_id = \"default\" # For free tier; `default` exsits in all systems\n", - "# resource_group_id = \"my-openai-proxy-ns\" # For paid account you can create a namespace aka resource group\n", - " # Other steps \n", - "\n", - "response = ai_core_client.resource_groups.create(resource_group_id = resource_group_id)\n", - "print(response.__dict__)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Create configuration to serve the model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with open(env_file_path) as efp:\n", - " env_val = json.load(efp)\n", - "\n", - "OPENAI_API_BASE = env_val[\"OPENAI_API_BASE\"]\n", - "OPENAI_API_KEY = env_val[\"OPENAI_API_KEY\"]\n", - "DOCKER_NAMESPACE = env_val[\"DOCKER_NAMESPACE\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# No modification required in below snippet\n", - "response = ai_core_client.configuration.create(\n", - " name = \"azure-proxy-serve\",\n", - " scenario_id = \"azure-openai-proxy\",\n", - " executable_id = \"azure-openai-proxy\",\n", - " input_artifact_bindings = [],\n", - " parameter_bindings = [\n", - " ParameterBinding(key = \"OPENAI_API_BASE\", value = OPENAI_API_BASE),\n", - " ParameterBinding(key = \"OPENAI_API_KEY\", value = OPENAI_API_KEY), \n", - " ParameterBinding(key = \"DOCKER_NAMESPACE\", value = DOCKER_NAMESPACE)\n", - " ],\n", - " resource_group = resource_group_id\n", - ")\n", - "\n", - "\n", - "serve_config_resp = response\n", - "print(response.__dict__)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Actually serve the proxy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Start proxy\n", - "response = ai_core_client.deployment.create(\n", - " configuration_id=serve_config_resp.id,\n", - " resource_group=resource_group_id\n", - ")\n", - "\n", - "deployment_resp = response\n", - "print(response.__dict__)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Poll deployment status.\n", - "# No modification required in below snipet\n", - "status = None\n", - "while status != Status.RUNNING and status != Status.DEAD:\n", - " time.sleep(5)\n", - " clear_output(wait=True)\n", - " # Get Status\n", - " #\n", - " deployment = response = ai_core_client.deployment.get(\n", - " deployment_id=deployment_resp.id,\n", - " resource_group=resource_group_id\n", - " )\n", - " status = deployment.status\n", - " print(\"...... deployment status ......\", flush=True)\n", - " print(deployment.status)\n", - " pprint(deployment.status_details)\n", - "\n", - " if deployment.status == Status.RUNNING:\n", - " print(f\"Deployment with {deployment_resp.id} complete!\")\n", - "\n", - "# Allow some time for deployment URL to get ready.\n", - "time.sleep(10)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8. Do an inference request" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "endpoint = f\"{deployment.deployment_url}/v2/envs\"\n", - "headers = {\"Authorization\": ai_core_client.rest_client.get_token(),\n", - " \"ai-resource-group\": resource_group_id,\n", - " \"Content-Type\": \"application/json\"}\n", - "response = requests.get(endpoint, headers=headers)\n", - "\n", - "legacy_davinci = False # set True if you have a davinci model deployment on Azure OpenAI Services\n", - "if legacy_davinci:\n", - " body = {\n", - " \"engine\": \"\", # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.\n", - " # For information of deployment creation and name Refer article https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal\n", - " \"prompt\": \"Classify the following news article into 1 of the following categories: categories: [Business, Tech, Politics, Sport, Entertainment]\\n\\nnews article: Donna Steffensen Is Cooking Up a New Kind of Perfection. The Internet’s most beloved cooking guru has a buzzy new book and a fresh new perspective:\\n\\nClassified category:\",\n", - " \"max_tokens\": 60,\n", - " \"temperature\": 0,\n", - " \"frequency_penalty\": 0,\n", - " \"presence_penalty\": 0,\n", - " \"top_p\": 1,\n", - " \"best_of\": 1,\n", - " \"stop\": \"null\"\n", - " }\n", - " endpoint = f\"{deployment.deployment_url}/v2/completion\"\n", - "else:\n", - " body = {\n", - " \"engine\": \"\", # include your engine from a deployment of an Azure OpenAI services model\n", - " \"prompt\": \"Classify the following news article into 1 of the following categories: categories: [Business, Tech, Politics, Sport, Entertainment]\\n\\nnews article: Donna Steffensen Is Cooking Up a New Kind of Perfection. The Internet’s most beloved cooking guru has a buzzy new book and a fresh new perspective:\\n\\nClassified category:\",\n", - " \"max_tokens\": 60,\n", - " \"temperature\": 0,\n", - " \"frequency_penalty\": 0,\n", - " \"presence_penalty\": 0,\n", - " \"stop\": \"null\"\n", - " }\n", - " endpoint = f\"{deployment.deployment_url}/v2/chat-completion\"\n", - "\n", - "headers = {\"Authorization\": ai_core_client.rest_client.get_token(),\n", - " \"ai-resource-group\": resource_group_id,\n", - " \"Content-Type\": \"application/json\"}\n", - "response = requests.post(endpoint, headers=headers, json=body)\n", - "\n", - "print(\"Inference result:\", response.json())\n", - "pprint(vars(response))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9. Kill deployment (optional)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "delete_resp = ai_core_client.deployment.modify(deployment_resp.id,\n", - " target_status=Status.STOPPED,\n", - " resource_group=resource_group_id)\n", - "status = None\n", - "while status != Status.STOPPED:\n", - " time.sleep(5)\n", - " clear_output(wait=True)\n", - " deployment = ai_core_client.deployment.get(deployment_resp.id, resource_group=resource_group_id)\n", - " status = deployment.status\n", - " print(\"...... killing deployment ......\", flush=True)\n", - " print(f\"Deployment status: {deployment.status}\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - }, - "vscode": { - "interpreter": { - "hash": "c30f2af5f468e7f5b45bcc30fca5f4886c90d54777aed916ed5f6294dfb24bf2" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install AI Core Python SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!pip install ai-core-sdk" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import requests\n", + "import time\n", + "import yaml\n", + "from IPython.display import clear_output\n", + "from pprint import pprint\n", + "\n", + "from ai_core_sdk.models import ParameterBinding, Status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Setup\n", + "aic_service_key_path = \"./resources/aic_service_key.json\"\n", + "git_setup_file_path = \"./resources/git_setup.json\"\n", + "docker_secret_file_path = \"./resources/docker_secret.json\"\n", + "env_file_path = \"./resources/env.json\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Connect to your AI Core instance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load Library\n", + "with open(aic_service_key_path) as ask:\n", + " aic_service_key = json.load(ask)\n", + "\n", + "from ai_core_sdk.ai_core_v2_client import AICoreV2Client\n", + "\n", + "# Create Connection\n", + "ai_core_client = AICoreV2Client(\n", + " base_url = aic_service_key[\"serviceurls\"][\"AI_API_URL\"] + \"/v2\", # The present AI API version is 2\n", + " auth_url= aic_service_key[\"url\"] + \"/oauth/token\",\n", + " client_id = aic_service_key[\"clientid\"],\n", + " client_secret = aic_service_key[\"clientsecret\"]\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Returns no output*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Test connection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = ai_core_client.repositories.query()\n", + "print(response.count) # Should return integer value else your values in above step are incorrect" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Onboard the Git repository that contains the templates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# WARNING: Refrain from onboarding again if previously onboarded\n", + "# else you will get AIAPIServerException 409\n", + "\n", + "with open(git_setup_file_path) as gs:\n", + "\t\tgit_key = json.load(gs)\n", + "\n", + "ai_core_client.repositories.create(\n", + " name = \"azure-openai-aicore\",\n", + " url = f\"https://github.com/{git_key['username']}/azure-openai-aicore-cap-api\", # Forked repo\n", + " username = git_key[\"username\"],\n", + " password = git_key[\"password\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Check onboarded repositories\n", + "response = ai_core_client.repositories.query()\n", + "#\n", + "for repository in response.resources:\n", + " print('Name:', repository.name)\n", + " print('URL:', repository.url)\n", + " print('Status:', repository.status)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Expected Output*\n", + "\n", + "```\n", + "...\n", + "Name: azure-openai-aicore\n", + "URL: https://github.com/john/azure-openai-aicore-cap-api\n", + "Status: RepositoryStatus.COMPLETED\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Register an application" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# WARNING: Run only once\n", + "\n", + "ai_core_client.applications.create(\n", + " repository_url = f\"https://github.com/{git_key['username']}/azure-openai-aicore-cap-api\",\n", + " path = \"01-ai-core-azure-openai-proxy/scenario\", # Scan this folder for instruction YAML files\n", + " revision = \"HEAD\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check application status\n", + "response = ai_core_client.applications.get_status(\"azure-openai-aicore-cap-api\") # repo name\n", + "\n", + "print(response.message) # application sync message\n", + " # If response is `rpc error: code = Unknown desc = authorization failed`\n", + " # then you Git PAT(used for onboarding Git is incorrect)\n", + " # to resolve offboard GitHub and onboard again." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Expected Output*\n", + "\n", + "```\n", + "successfully synced (all tasks run)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# List scenarios scanned by the application created above\n", + "response = ai_core_client.scenario.query(resource_group='default')\n", + "\n", + "for scenario in response.resources:\n", + " print(scenario.__dict__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Expected Output*\n", + "\n", + "```\n", + "...\n", + "{'id': 'azure-openai-proxy', 'name': 'Azure OpenAI Proxy', 'description': 'Azure OpenAI Proxy', 'labels': None, ...)}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4 Docker Hub (optional)\n", + "#### 4.1 Register Docker secret on SAP BTP, AI Core (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(docker_secret_file_path) as dsf:\n", + " docker_secret = json.load(dsf)\n", + "\n", + "\n", + "response = ai_core_client.docker_registry_secrets.create(\n", + " name = docker_secret[\"name\"],\n", + " data = docker_secret[\"data\"]\n", + ")\n", + "\n", + "print(response.__dict__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Expected Output*\n", + "\n", + "```\n", + "{'message': 'secret has been created'}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4.2 Build and push Docker image (optional)\n", + "```\n", + "$ cd proxy\n", + "$ docker build -t {DOCKER_USERNAME}/azure-openai-proxy .\n", + "$ docker push {DOCKER_USERNAME}/azure-openai-proxy\n", + "```\n", + "\n", + "THROUGH THE DOCKER CLI. \n", + "See: https://developers.sap.com/tutorials/ai-core-aiapi-clientsdk-workflows.html#f824a41d-efe8-4883-8238-caef4ac5f789" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Create a resource group (Optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# For Free Tier AI Core Serice:\n", + "# you will not be able to create a new resource group.\n", + "# resource group named `default` exists in all systems.\n", + "# cell execution does not impact existing contents\n", + "#\n", + "# For paid AI Core service: \n", + "# IF you wish execute this step\n", + "# You are NOT REQUIRED TO RE-EXECUTE or Redo previous completed steps.\n", + "# Ensure that in the steps that follows modify with your resource group name\n", + "\n", + "resource_group_id = \"default\" # For free tier; `default` exsits in all systems\n", + "# resource_group_id = \"my-openai-proxy-ns\" # For paid account you can create a namespace aka resource group\n", + " # Other steps \n", + "\n", + "response = ai_core_client.resource_groups.create(resource_group_id = resource_group_id)\n", + "print(response.__dict__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6. Create configuration to serve the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(env_file_path) as efp:\n", + " env_val = json.load(efp)\n", + "\n", + "OPENAI_API_BASE = env_val[\"OPENAI_API_BASE\"]\n", + "OPENAI_API_KEY = env_val[\"OPENAI_API_KEY\"]\n", + "DOCKER_NAMESPACE = env_val[\"DOCKER_NAMESPACE\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# No modification required in below snippet\n", + "response = ai_core_client.configuration.create(\n", + " name = \"azure-proxy-serve\",\n", + " scenario_id = \"azure-openai-proxy\",\n", + " executable_id = \"azure-openai-proxy\",\n", + " input_artifact_bindings = [],\n", + " parameter_bindings = [\n", + " ParameterBinding(key = \"OPENAI_API_BASE\", value = OPENAI_API_BASE),\n", + " ParameterBinding(key = \"OPENAI_API_KEY\", value = OPENAI_API_KEY), \n", + " ParameterBinding(key = \"DOCKER_NAMESPACE\", value = DOCKER_NAMESPACE)\n", + " ],\n", + " resource_group = resource_group_id\n", + ")\n", + "\n", + "\n", + "serve_config_resp = response\n", + "print(response.__dict__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7. Actually serve the proxy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Start proxy\n", + "response = ai_core_client.deployment.create(\n", + " configuration_id=serve_config_resp.id,\n", + " resource_group=resource_group_id\n", + ")\n", + "\n", + "deployment_resp = response\n", + "print(response.__dict__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Poll deployment status.\n", + "# No modification required in below snipet\n", + "status = None\n", + "while status != Status.RUNNING and status != Status.DEAD:\n", + " time.sleep(5)\n", + " clear_output(wait=True)\n", + " # Get Status\n", + " #\n", + " deployment = response = ai_core_client.deployment.get(\n", + " deployment_id=deployment_resp.id,\n", + " resource_group=resource_group_id\n", + " )\n", + " status = deployment.status\n", + " print(\"...... deployment status ......\", flush=True)\n", + " print(deployment.status)\n", + " pprint(deployment.status_details)\n", + "\n", + " if deployment.status == Status.RUNNING:\n", + " print(f\"Deployment with {deployment_resp.id} complete!\")\n", + "\n", + "# Allow some time for deployment URL to get ready.\n", + "time.sleep(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 8. Do an inference request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "endpoint = f\"{deployment.deployment_url}/v2/envs\"\n", + "headers = {\"Authorization\": ai_core_client.rest_client.get_token(),\n", + " \"ai-resource-group\": resource_group_id,\n", + " \"Content-Type\": \"application/json\"}\n", + "response = requests.get(endpoint, headers=headers)\n", + "\n", + "legacy_davinci = False # set True if you have a davinci model deployment on Azure OpenAI Services\n", + "if legacy_davinci:\n", + " body = {\n", + " \"engine\": \"\", # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.\n", + " # For information of deployment creation and name Refer article https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal\n", + " \"prompt\": \"Classify the following news article into 1 of the following categories: categories: [Business, Tech, Politics, Sport, Entertainment]\\n\\nnews article: Donna Steffensen Is Cooking Up a New Kind of Perfection. The Internet’s most beloved cooking guru has a buzzy new book and a fresh new perspective:\\n\\nClassified category:\",\n", + " \"max_tokens\": 60,\n", + " \"temperature\": 0,\n", + " \"frequency_penalty\": 0,\n", + " \"presence_penalty\": 0,\n", + " \"top_p\": 1,\n", + " \"best_of\": 1,\n", + " \"stop\": \"null\"\n", + " }\n", + " endpoint = f\"{deployment.deployment_url}/v2/completion\"\n", + "else:\n", + " body = {\n", + " \"engine\": \"\", # include your engine from a deployment of an Azure OpenAI services model\n", + " \"prompt\": \"Classify the following news article into 1 of the following categories: categories: [Business, Tech, Politics, Sport, Entertainment]\\n\\nnews article: Donna Steffensen Is Cooking Up a New Kind of Perfection. The Internet’s most beloved cooking guru has a buzzy new book and a fresh new perspective:\\n\\nClassified category:\",\n", + " \"max_tokens\": 60,\n", + " \"temperature\": 0,\n", + " \"frequency_penalty\": 0,\n", + " \"presence_penalty\": 0,\n", + " \"stop\": \"null\"\n", + " }\n", + " endpoint = f\"{deployment.deployment_url}/v2/chat-completion\"\n", + "\n", + "headers = {\"Authorization\": ai_core_client.rest_client.get_token(),\n", + " \"ai-resource-group\": resource_group_id,\n", + " \"Content-Type\": \"application/json\"}\n", + "response = requests.post(endpoint, headers=headers, json=body)\n", + "\n", + "print(\"Inference result:\", response.json())\n", + "pprint(vars(response))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 9. Kill deployment (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "delete_resp = ai_core_client.deployment.modify(deployment_resp.id,\n", + " target_status=Status.STOPPED,\n", + " resource_group=resource_group_id)\n", + "status = None\n", + "while status != Status.STOPPED:\n", + " time.sleep(5)\n", + " clear_output(wait=True)\n", + " deployment = ai_core_client.deployment.get(deployment_resp.id, resource_group=resource_group_id)\n", + " status = deployment.status\n", + " print(\"...... killing deployment ......\", flush=True)\n", + " print(f\"Deployment status: {deployment.status}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "vscode": { + "interpreter": { + "hash": "c30f2af5f468e7f5b45bcc30fca5f4886c90d54777aed916ed5f6294dfb24bf2" } - \ No newline at end of file + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}