diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-files/score/score.py b/sdk/python/foundation-models/system/inference/text-generation/llama-files/score/score.py index df6bc39a8a..f64703ec0e 100644 --- a/sdk/python/foundation-models/system/inference/text-generation/llama-files/score/score.py +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-files/score/score.py @@ -355,10 +355,37 @@ def get_parameter_type(sample_input_ex, sample_output_ex=None): model = load_model(model_path) +def get_aacs_access_key(): + key = os.environ.get("CONTENT_SAFETY_KEY") + + if key: + return key + + uai_client_id = os.environ.get("UAI_CLIENT_ID") + if not uai_client_id: + raise RuntimeError( + "Cannot get AACS access key, both UAI_CLIENT_ID and CONTENT_SAFETY_KEY are not set, exiting..." + ) + + subscription_id = os.environ.get("SUBSCRIPTION_ID") + resource_group_name = os.environ.get("RESOURCE_GROUP_NAME") + aacs_account_name = os.environ.get("CONTENT_SAFETY_ACCOUNT_NAME") + from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient + from azure.identity import ManagedIdentityCredential + + credential = ManagedIdentityCredential(client_id=uai_client_id) + cs_client = CognitiveServicesManagementClient(credential, subscription_id) + key = cs_client.accounts.list_keys( + resource_group_name=resource_group_name, account_name=aacs_account_name + ).key1 + + return key + + def init(): global inputs_collector, outputs_collector, aacs_client endpoint = os.environ.get("CONTENT_SAFETY_ENDPOINT") - key = os.environ.get("CONTENT_SAFETY_KEY") + key = get_aacs_access_key() # Create an Content Safety client headers_policy = HeadersPolicy() diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-prepare-uai.ipynb b/sdk/python/foundation-models/system/inference/text-generation/llama-prepare-uai.ipynb new file mode 100644 index 0000000000..9885ecc7b9 --- /dev/null +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-prepare-uai.ipynb @@ -0,0 +1,448 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prepare UAI account for Azure Content Safety enabled Llama 2 model deployment\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Prerequisites" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.1 Assign variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uai_name = f\"llama-uai\"\n", + "\n", + "# The name of the AACS created in deploy_llama notebook\n", + "# Leave it blank will assign all AACS resource under resources group to the UAI\n", + "aacs_name = \"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.2 Get credential" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", + "\n", + "try:\n", + " credential = DefaultAzureCredential()\n", + " # Check if given credential can get token successfully.\n", + " credential.get_token(\"https://management.azure.com/.default\")\n", + "except Exception as ex:\n", + " # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work\n", + " credential = InteractiveBrowserCredential()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3 Configure workspace " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml import MLClient\n", + "\n", + "try:\n", + " ml_client = MLClient.from_config(credential=credential)\n", + "except Exception as ex:\n", + " # enter details of your AML workspace\n", + " subscription_id = \"\"\n", + " resource_group = \"\"\n", + " workspace_name = \"\"\n", + "\n", + " # get a handle to the workspace\n", + " ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", + "\n", + "\n", + "subscription_id = ml_client.subscription_id\n", + "resource_group = ml_client.resource_group_name\n", + "workspace_name = ml_client.workspace_name\n", + "workspace_resource = ml_client.workspaces.get(workspace_name)\n", + "workspace_location = workspace_resource.location\n", + "\n", + "print(f\"Connected to workspace {workspace_name}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Create a Managed Identity for the Azure AI Content Safety enabled Llama 2 endpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.1 Get a handle to the ManagedServiceIdentityClient" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.mgmt.msi import ManagedServiceIdentityClient\n", + "from azure.mgmt.msi.models import Identity\n", + "\n", + "msi_client = ManagedServiceIdentityClient(\n", + " subscription_id=subscription_id,\n", + " credential=credential,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.2 Create the User Assigned Identity:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "msi_client.user_assigned_identities.create_or_update(\n", + " resource_group_name=resource_group,\n", + " resource_name=uai_name,\n", + " parameters=Identity(location=workspace_location),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.3 Retrieve the identity object\n", + "we need to retrieve the identity object so that we can use it to deploy the Azure AI Content Safety enabled Llama 2 online endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uai_identity = msi_client.user_assigned_identities.get(\n", + " resource_group_name=resource_group,\n", + " resource_name=uai_name,\n", + ")\n", + "uai_principal_id = uai_identity.principal_id\n", + "uai_client_id = uai_identity.client_id\n", + "uai_id = uai_identity.id\n", + "print(f\"UAI principal id: {uai_principal_id}\")\n", + "print(f\"UAI id: {uai_id}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2.4 Grant appropriate roles to the UAI we created above.\n", + "Note: In order to successfully run scripts in current step, your must have owner permission on the AACS resource and the Llama 2 endpoint, which we created in the previous steps." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.1 Get an AuthorizationManagementClient to list Role Definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.mgmt.authorization import AuthorizationManagementClient\n", + "from azure.mgmt.authorization.v2022_04_01.models import (\n", + " RoleAssignmentCreateParameters,\n", + ")\n", + "import uuid\n", + "\n", + "role_definition_client = AuthorizationManagementClient(\n", + " credential=credential,\n", + " subscription_id=subscription_id,\n", + " api_version=\"2022-04-01\",\n", + ")\n", + "role_assignment_client = AuthorizationManagementClient(\n", + " credential=credential,\n", + " subscription_id=subscription_id,\n", + " api_version=\"2022-04-01\",\n", + ")\n", + "\n", + "uai_role_check_list = {\n", + " \"Cognitive Services Contributor\": {\n", + " \"step\": \"4.2.2\",\n", + " \"description\": \"assigne the role Cognitive Services User to the UAI on the Azure AI Content Safety resource.\",\n", + " },\n", + " \"AcrPull\": {\n", + " \"step\": \"4.2.3\",\n", + " \"description\": \"assigne the role AcrPull to the UAI on the Azure Container Registry.\",\n", + " },\n", + " \"Storage Blob Data Reader\": {\n", + " \"step\": \"4.2.4\",\n", + " \"description\": \"assigne the role Storage Blob Data Reader to the UAI on the Azure Storage account.\",\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.2 Define util function to assign access for UAI\n", + "Cognitive Services User role is required to access the Azure Content Safety resource." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "role_name = \"Cognitive Services Contributor\" # minimum role required for listing AACS access keys\n", + "\n", + "\n", + "def assign_access_to_acs(scope):\n", + " role_defs = role_definition_client.role_definitions.list(scope=scope)\n", + " role_def = next((r for r in role_defs if r.role_name == role_name))\n", + "\n", + " from azure.core.exceptions import ResourceExistsError\n", + "\n", + " try:\n", + " role_assignment_client.role_assignments.create(\n", + " scope=scope,\n", + " role_assignment_name=str(uuid.uuid4()),\n", + " parameters=RoleAssignmentCreateParameters(\n", + " role_definition_id=role_def.id,\n", + " principal_id=uai_principal_id,\n", + " principal_type=\"ServicePrincipal\",\n", + " ),\n", + " )\n", + " except ResourceExistsError as ex:\n", + " pass\n", + " except Exception as ex:\n", + " print(ex)\n", + " raise ex\n", + "\n", + " if role_name in uai_role_check_list:\n", + " del uai_role_check_list[role_name]\n", + " print(\n", + " f\"Role assignment for {role_name} at the Azure AI Content Safety resource level completed.\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.3 Grant the user identity access to the Azure Content Safety resource" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient\n", + "\n", + "acs_client = CognitiveServicesManagementClient(credential, subscription_id)\n", + "\n", + "\n", + "def find_acs(accounts):\n", + " return next(x for x in accounts if x.kind == \"ContentSafety\")\n", + "\n", + "\n", + "if aacs_name == \"\":\n", + " for acs_resource in acs_client.accounts.list_by_resource_group(resource_group):\n", + " assign_access_to_acs(acs_resource.id)\n", + "else:\n", + " acs_resource = acs_client.accounts.get(resource_group, aacs_name)\n", + " assign_access_to_acs(acs_resource.id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.4 Assign AcrPull at the workspace container registry scope\n", + "Since we will create the Azure AI Content Safety enabled Llama 2 endpoint with User Assigned Identity, the user managed identity must have Storage Blob Data Reader permission on the storage account for the workspace, and AcrPull permission on the Azure Container Registry (ACR) for the workspace. Make sure your User Assigned Identity has the right permission." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "container_registry = workspace_resource.container_registry\n", + "\n", + "role_name = \"AcrPull\"\n", + "\n", + "role_defs = role_definition_client.role_definitions.list(scope=container_registry)\n", + "role_def = next((r for r in role_defs if r.role_name == role_name))\n", + "\n", + "from azure.core.exceptions import ResourceExistsError\n", + "\n", + "try:\n", + " role_assignment_client.role_assignments.create(\n", + " scope=container_registry,\n", + " role_assignment_name=str(uuid.uuid4()),\n", + " parameters=RoleAssignmentCreateParameters(\n", + " role_definition_id=role_def.id,\n", + " principal_id=uai_principal_id,\n", + " principal_type=\"ServicePrincipal\",\n", + " ),\n", + " )\n", + "except ResourceExistsError as ex:\n", + " pass\n", + "except Exception as ex:\n", + " print(ex)\n", + " raise ex\n", + "\n", + "if role_name in uai_role_check_list:\n", + " del uai_role_check_list[role_name]\n", + "print(\"Role assignment for AcrPull at the workspace container registry completed.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.6 Assign Storage Blob Data Reader at the workspace storage account scope" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "role_name = \"Storage Blob Data Reader\"\n", + "blob_scope = workspace_resource.storage_account\n", + "\n", + "role_defs = role_definition_client.role_definitions.list(scope=blob_scope)\n", + "role_def = next((r for r in role_defs if r.role_name == role_name))\n", + "\n", + "from azure.core.exceptions import ResourceExistsError\n", + "\n", + "try:\n", + " role_assignment_client.role_assignments.create(\n", + " scope=blob_scope,\n", + " role_assignment_name=str(uuid.uuid4()),\n", + " parameters=RoleAssignmentCreateParameters(\n", + " role_definition_id=role_def.id,\n", + " principal_id=uai_principal_id,\n", + " principal_type=\"ServicePrincipal\",\n", + " ),\n", + " )\n", + "except ResourceExistsError as ex:\n", + " pass\n", + "except Exception as ex:\n", + " print(ex)\n", + " raise ex\n", + "\n", + "if role_name in uai_role_check_list:\n", + " del uai_role_check_list[role_name]\n", + "print(\n", + " \"Role assignment for `Storage Blob Data Reader` at the workspace storage account completed.\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2.4.7 Let's make sure we didn't miss anything in the previous steps, please execute the following script to check on that:__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check everything is properly done before creating the Azure AI Content Safety Enabled Llama 2 online endpoint\n", + "missing_steps = []\n", + "print(\n", + " \"You selected UAI to deploy the Azure AI Content Safety Enabled Llama 2 online endpoint, checking if the UAI has the required roles assigned...\"\n", + ")\n", + "if uai_role_check_list:\n", + " for key, value in uai_role_check_list.items():\n", + " missing_steps.append(\n", + " f'Please go to step {value[\"step\"]} to {value[\"description\"]}'\n", + " )\n", + "\n", + "if missing_steps:\n", + " print(\"Seems you missed some step above.\")\n", + " steps = \"\\n\".join(missing_steps)\n", + " raise Exception(f\"Please complete the missing steps before proceeding:\\n{steps}\")\n", + "else:\n", + " print(\n", + " \"All steps are completed, proceeding to create the Azure AI Content Safety Enabled Llama 2 online endpoint...\\n\"\n", + " f\"uai_id: {uai_id}\\n\"\n", + " f\"uai_client_id: {uai_client_id}\"\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-batch-deployment.ipynb b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-batch-deployment.ipynb index f58d6179b7..bda48ee15b 100644 --- a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-batch-deployment.ipynb +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-batch-deployment.ipynb @@ -10,7 +10,7 @@ "### This notebook is under preview\n", "### The steps are:\n", "1. Create an __Azure AI Content Safety__ resource for moderating the request from user and response from the __Llama 2__ batch endpoint.\n", - "2. Create a new __Azure AI Content Safety__ enabled __Llama 2__ batch endpoint with a custom score.py which will integrate with the __Azure AI Content Safety__ resource to moderate the response from the __Llama 2__ model and the request from the user, but to make the custom score.py to sucessfully autheticated to the __Azure AI Content Safety__ resource, for batch inferencing is using __Environment variable__ to pass the access key of the __Azure AI Content Safety__ resource to the custom score.py via environment variable, then the custom score.py can use the key directly to access the Azure AI Content Safety resource, this option is less secure than the first option, if someone in your org has access to the endpoint, he/she can get the access key from the environment variable and use it to access the Azure AI Content Safety resource.\n", + "2. Create a new __Azure AI Content Safety__ enabled __Llama 2__ batch endpoint with a custom score.py which will integrate with the __Azure AI Content Safety__ resource to moderate the response from the __Llama 2__ model and the request from the user, but to make the custom score.py to successfully authenticated to the __Azure AI Content Safety__ resource, for batch inferencing is using __Environment variable__ to pass the access key of the __Azure AI Content Safety__ resource to the custom score.py via environment variable, then the custom score.py can use the key directly to access the Azure AI Content Safety resource, this option is less secure than the first option, if someone in your org has access to the endpoint, he/she can get the access key from the environment variable and use it to access the Azure AI Content Safety resource.\n", " " ] }, diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb index 2931aa683b..cf09d31b9e 100644 --- a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb @@ -10,8 +10,9 @@ "### This notebook is under preview\n", "### The steps are:\n", "1. Create an __Azure AI Content Safety__ resource for moderating the request from user and response from the __Llama 2__ online endpoint.\n", - "2. Create a new __Azure AI Content Safety__ enabled __Llama 2__ online endpoint with a custom score.py which will integrate with the __Azure AI Content Safety__ resource to moderate the response from the __Llama 2__ model and the request from the user, but to make the custom score.py to sucessfully autheticated to the __Azure AI Content Safety__ resource, we have 2 options:\n", - " 1. __Environment variable__, simpler but less secure approach, is to just pass the access key of the __Azure AI Content Safety__ resource to the custom score.py via environment variable, then the custom score.py can use the key directly to access the Azure AI Content Safety resource, this option is less secure than the first option, if someone in your org has access to the endpoint, he/she can get the access key from the environment variable and use it to access the Azure AI Content Safety resource.\n", + "2. Create a new __Azure AI Content Safety__ enabled __Llama 2__ online endpoint with a custom score.py which will integrate with the __Azure AI Content Safety__ resource to moderate the response from the __Llama 2__ model and the request from the user, but to make the custom score.py to successfully authenticated to the __Azure AI Content Safety__ resource, we have 2 options:\n", + " 1. __UAI__, recommended but more complex approach, is to create a User Assigned Identity (UAI) and assign appropriate roles to the UAI. Then, the custom score.py can obtain the access token of the UAI from the AAD server to access the Azure AI Content Safety resource. Use [this notebook](llama-prepare-uai.ipynb) to create UAI account for step 3 below\n", + " 2. __Environment variable__, simpler but less secure approach, is to just pass the access key of the Azure AI Content Safety resource to the custom score.py via environment variable, then the custom score.py can use the key directly to access the Azure AI Content Safety resource, this option is less secure than the first option, if someone in your org has access to the endpoint, he/she can get the access key from the environment variable and use it to access the Azure AI Content Safety resource.\n", " " ] }, @@ -56,7 +57,10 @@ "# The severity level that will trigger response be blocked\n", "# Please reference Azure AI content documentation for more details\n", "# https://learn.microsoft.com/en-us/azure/cognitive-services/content-safety/concepts/harm-categories\n", - "content_severity_threshold = \"2\"" + "content_severity_threshold = \"2\"\n", + "\n", + "# UAI to be used for endpoint if you choose to use UAI as authentication method\n", + "uai_name = \"\" # default to \"llama-uai\" in prepare uai notebook" ] }, { @@ -249,6 +253,10 @@ "\n", "aacs_endpoint = aacs.properties.endpoint\n", "aacs_resource_id = aacs.id\n", + "aacs_name = aacs.name\n", + "print(\n", + " f\"AACS name is {aacs.name}, use this name in UAI preparation notebook to create UAI.\"\n", + ")\n", "print(f\"AACS endpoint is {aacs_endpoint}\")\n", "print(f\"AACS ResourceId is {aacs_resource_id}\")\n", "\n", @@ -299,12 +307,40 @@ " )" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3.2 Check if UAI is used" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uai_id = \"\"\n", + "uai_client_id = \"\"\n", + "if uai_name != \"\":\n", + " from azure.mgmt.msi import ManagedServiceIdentityClient\n", + " from azure.mgmt.msi.models import Identity\n", + "\n", + " msi_client = ManagedServiceIdentityClient(\n", + " subscription_id=subscription_id,\n", + " credential=credential,\n", + " )\n", + " uai_resource = msi_client.user_assigned_identities.get(resource_group, uai_name)\n", + " uai_id = uai_resource.id\n", + " uai_client_id = uai_resource.client_id" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "#### 3.2 Create Llama 2 online endpoint\n", + "#### 3.3 Create Llama 2 online endpoint\n", "This step may take a few minutes." ] }, @@ -314,7 +350,11 @@ "metadata": {}, "outputs": [], "source": [ - "from azure.ai.ml.entities import ManagedOnlineEndpoint\n", + "from azure.ai.ml.entities import (\n", + " ManagedOnlineEndpoint,\n", + " IdentityConfiguration,\n", + " ManagedIdentityConfiguration,\n", + ")\n", "\n", "# Check if the endpoint already exists in the workspace\n", "try:\n", @@ -325,7 +365,14 @@ "\n", " # Define the endpoint\n", " endpoint = ManagedOnlineEndpoint(\n", - " name=endpoint_name, description=\"Test endpoint for model\"\n", + " name=endpoint_name,\n", + " description=\"Test endpoint for model\",\n", + " identity=IdentityConfiguration(\n", + " type=\"user_assigned\",\n", + " user_assigned_identities=[ManagedIdentityConfiguration(resource_id=uai_id)],\n", + " )\n", + " if uai_id != \"\"\n", + " else None,\n", " )\n", "\n", " # Trigger the endpoint creation\n", @@ -343,7 +390,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### 3.3 Deploy Llama 2 model\n", + "##### 3.4 Deploy Llama 2 model\n", "This step may take a few minutes." ] }, @@ -372,9 +419,13 @@ " code=\"./llama-files/score\", scoring_script=\"score.py\"\n", " ),\n", " environment_variables={\n", + " \"CONTENT_SAFETY_ACCOUNT_NAME\": aacs_name,\n", " \"CONTENT_SAFETY_ENDPOINT\": aacs_endpoint,\n", - " \"CONTENT_SAFETY_KEY\": aacs_access_key,\n", + " \"CONTENT_SAFETY_KEY\": aacs_access_key if uai_client_id == \"\" else None,\n", " \"CONTENT_SAFETY_THRESHOLD\": content_severity_threshold,\n", + " \"SUBSCRIPTION_ID\": subscription_id,\n", + " \"RESOURCE_GROUP_NAME\": resource_group,\n", + " \"UAI_CLIENT_ID\": uai_client_id,\n", " },\n", " request_settings=OnlineRequestSettings(request_timeout_ms=90000),\n", " liveness_probe=ProbeSettings(\n",