diff --git a/sdk/python/jobs/finetuning/standalone/chat-completion/chat_completion_with_model_as_service.ipynb b/sdk/python/jobs/finetuning/standalone/chat-completion/chat_completion_with_model_as_service.ipynb index ecba2ed26a..7a5afdf8c4 100644 --- a/sdk/python/jobs/finetuning/standalone/chat-completion/chat_completion_with_model_as_service.ipynb +++ b/sdk/python/jobs/finetuning/standalone/chat-completion/chat_completion_with_model_as_service.ipynb @@ -110,9 +110,9 @@ "source": [ "### 2. Pick a foundation model to fine tune\n", "\n", - "`Phi-3-mini-4k-instruct` is a 3.8B parameters, lightweight, state-of-the-art open model built upon datasets used for Phi-2. The model belongs to the Phi-3 model family, and the Mini version comes in two variants 4K and 128K which is the context length (in tokens) it can support, we need to finetune the model for our specific purpose in order to use it. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `chat-completion` task. In this example, we use the `Phi-3-mini-4k-instruct` model. If you have opened this notebook for a different model, replace the model name and version accordingly.\n", + "`Phi-3-mini-4k-instruct` is a 3.8B parameters, lightweight, state-of-the-art open model built upon datasets used for Phi-2. The model belongs to the Phi-3 model family, and the Mini version comes in two variants 4K and 128K which is the context length (in tokens) it can support. You can browse these models in the Model Catalog in the Azure AI Studio, filtering by the `chat-completion` task. In this example, we use the `Phi-3-mini-4k-instruct` model. If you have opened this notebook for a different model, replace the model name and version accordingly.\n", "\n", - "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog." + "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in Azure AI Studio Model Catalog." ] }, { @@ -121,7 +121,7 @@ "metadata": {}, "outputs": [], "source": [ - "model_name = \"Meta-Llama-3.1-8B-Instruct\" # \"Phi-3-mini-4k-instruct\"\n", + "model_name = \"Phi-3-mini-4k-instruct\" #\"Meta-Llama-3.1-8B-Instruct\" \n", "foundation_model = registry_ml_client.models.get(model_name, label=\"latest\")\n", "print(\n", " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n", @@ -148,8 +148,8 @@ "source": [ "### 3. Prepare data\n", "\n", - "- The [download-dataset.py](./download-dataset.py) is used to download the ultrachat_200k dataset and transform the dataset into finetune pipeline component consumable format. Also as the dataset is large, hence we here have only part of the dataset.\n", - "- Running the below script only downloads 5% of the data. This can be increased by changing `dataset_split_pc` parameter to desired percenetage.\n", + "- The [download-dataset.py](./download-dataset.py) is used to download the ultrachat_200k dataset and transform the dataset into the format expected by model. Also as the dataset is large, hence we here have only part of the dataset.\n", + "- Running the below script downloads only 1% of the data because the dataset is very large. This can be increased by changing `dataset_split_pc` parameter to desired percentage.\n", "\n", "**Note** : Some language models have different language codes and hence the column names in the dataset should reflect the same.\n", "\n", @@ -238,7 +238,7 @@ "dataset_version = \"1\"\n", "train_dataset_name = f\"{dataset_dir}_train\"\n", "try:\n", - " train_data_created = workspace_ml_client.data.get(\n", + " train_data_asset= workspace_ml_client.data.get(\n", " train_dataset_name, version=dataset_version\n", " )\n", " print(f\"Dataset {train_dataset_name} already exists\")\n", @@ -251,7 +251,7 @@ " name=train_dataset_name,\n", " version=\"1\",\n", " )\n", - " train_data_created = workspace_ml_client.data.create_or_update(train_data)" + " train_data_asset = workspace_ml_client.data.create_or_update(train_data)" ] }, { @@ -265,7 +265,7 @@ "dataset_version = \"1\"\n", "validation_dataset_name = f\"{dataset_dir}_validation\"\n", "try:\n", - " validation_data_created = workspace_ml_client.data.get(\n", + " validation_data_asset= workspace_ml_client.data.get(\n", " validation_dataset_name, version=dataset_version\n", " )\n", " print(f\"Dataset {validation_dataset_name} already exists\")\n", @@ -278,7 +278,7 @@ " name=validation_dataset_name,\n", " version=\"1\",\n", " )\n", - " validation_data_created = workspace_ml_client.data.create_or_update(validation_data)" + " validation_data_asset = workspace_ml_client.data.create_or_update(validation_data)" ] }, { @@ -290,12 +290,12 @@ "from azure.ai.ml.entities._inputs_outputs import Input\n", "\n", "training_data = Input(\n", - " type=train_data_created.type,\n", - " path=f\"azureml://locations/{workspace.location}/workspaces/{workspace._workspace_id}/data/{train_data_created.name}/versions/{train_data_created.version}\",\n", + " type=train_data_asset.type,\n", + " path=f\"azureml://locations/{workspace.location}/workspaces/{workspace._workspace_id}/data/{train_data_asset.name}/versions/{train_data_asset.version}\",\n", ")\n", "validation_data = Input(\n", - " type=validation_data_created.type,\n", - " path=f\"azureml://locations/{workspace.location}/workspaces/{workspace._workspace_id}/data/{validation_data_created.name}/versions/{validation_data_created.version}\",\n", + " type=validation_data_asset.type,\n", + " path=f\"azureml://locations/{workspace.location}/workspaces/{workspace._workspace_id}/data/{validation_data_asset.name}/versions/{validation_data_asset.version}\",\n", ")" ] }, @@ -313,7 +313,7 @@ "metadata": {}, "source": [ "##### Create marketplace subscription for 3P models\n", - "Note: Skip this step for 1P models that are offered on Azure. Example: Phi family of models" + "Note: Skip this step for 1P(Microsoft) models that are offered on Azure. Example: Phi family of models" ] }, { @@ -365,8 +365,9 @@ "\n", "1. `model` - Base model to finetune.\n", "2. `training_data` - Training data for finetuning the base model.\n", - "3. `task` - FineTuning task to perform. eg. TEXT_COMPLETION for text-generation/text-generation finetuning jobs.\n", - "4. `outputs`- Output registered model name.\n", + "3. `validation_data` - Validation data for finetuning the base model.\n", + "4. `task` - FineTuning task to perform. eg. TEXT_COMPLETION for text-generation/text-generation finetuning jobs.\n", + "5. `outputs`- Output registered model name.\n", "\n", "##### Following parameters are optional:\n", "\n",