diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb index 83c502925b..5ab2370012 100644 --- a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb @@ -298,12 +298,16 @@ " reg_client.models.list(model_name)\n", ") # list available versions of the model\n", "llama_model = None\n", + "hf_tgi = False # If text-generation-inference (hf container) is supported for model\n", "\n", "if len(version_list) == 0:\n", " raise Exception(f\"No model named {model_name} found in registry\")\n", "else:\n", " model_version = version_list[0].version\n", " llama_model = reg_client.models.get(model_name, model_version)\n", + " if \"inference_supported_envs\" in llama_model.tags:\n", + " if \"hf_tgi\" in llama_model.tags[\"inference_supported_envs\"]:\n", + " hf_tgi = True\n", " print(\n", " f\"Using model name: {llama_model.name}, version: {llama_model.version}, id: {llama_model.id} for inferencing\"\n", " )" @@ -448,16 +452,25 @@ "source": [ "from azure.ai.ml.entities import (\n", " OnlineRequestSettings,\n", + " CodeConfiguration,\n", " ManagedOnlineDeployment,\n", " ProbeSettings,\n", ")\n", "\n", + "# For HF TGI inferencing, the scoring script is baked into the container\n", + "code_configuration = (\n", + " CodeConfiguration(code=\"./llama-files/score/default/\", scoring_script=\"score.py\")\n", + " if not hf_tgi\n", + " else None\n", + ")\n", + "\n", "deployment = ManagedOnlineDeployment(\n", " name=deployment_name,\n", " endpoint_name=endpoint_name,\n", " model=llama_model.id,\n", " instance_type=sku_name,\n", " instance_count=1,\n", + " code_configuration=code_configuration,\n", " environment_variables=deployment_env_vars,\n", " request_settings=OnlineRequestSettings(request_timeout_ms=REQUEST_TIMEOUT_MS),\n", " liveness_probe=ProbeSettings(\n",