diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index a85adac19b..4424c29a77 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -243,6 +243,7 @@ sdk/python/foundation-models/cohere/command_tools-langchain.ipynb @stewart-co @k /sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/train/train_component.py @sdgilley @msakande @Blackmist @ssalgadodev @lgayhardt @fbsolo-ms1 /sdk/python/using-mlflow/deploy/environment/conda.yaml @sdgilley @msakande @Blackmist @ssalgadodev @lgayhardt @fbsolo-ms1 /sdk/python/using-mlflow/deploy/model/conda.yaml @sdgilley @msakande @Blackmist @ssalgadodev @lgayhardt @fbsolo-ms1 +/sdk/python/jobs/finetuning/standalone/chat-completion/chat_completion_with_model_as_service.ipynb @sagarsumant @srsaggam /setup/setup-ci/install-pip-package.sh @sdgilley @msakande @Blackmist @ssalgadodev @lgayhardt @fbsolo-ms1 /setup/setup-repo/azure-github.sh @sdgilley @msakande @Blackmist @ssalgadodev @lgayhardt @fbsolo-ms1 /tutorials/get-started-notebooks/workstation_env.yml @sdgilley @msakande @Blackmist @ssalgadodev @lgayhardt @fbsolo-ms1 diff --git a/sdk/python/jobs/finetuning/standalone/chat-completion/chat_completion_with_model_as_service.ipynb b/sdk/python/jobs/finetuning/standalone/chat-completion/chat_completion_with_model_as_service.ipynb new file mode 100644 index 0000000000..d8f9bebf7b --- /dev/null +++ b/sdk/python/jobs/finetuning/standalone/chat-completion/chat_completion_with_model_as_service.ipynb @@ -0,0 +1,560 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## FineTuning LLM with Model-As-Service\n", + "\n", + "This sample shows how use create a standalone FineTuning job to fine tune a model to summarize a dialog between 2 people using samsum dataset.\n", + "\n", + "#### Training data\n", + "We use the [ultrachat_200k](https://huggingface.co/datasets/samsum) dataset. The dataset has four splits, suitable for:\n", + "* Supervised fine-tuning (sft).\n", + "* Generation ranking (gen).\n", + "\n", + "#### Model\n", + "We will use the Phi-3-mini-4k-instruct model to show how user can finetune a model for chat-completion task. If you opened this notebook from a specific model card, remember to replace the specific model name. \n", + "\n", + "#### Outline\n", + "1. Setup pre-requisites\n", + "2. Pick a model to fine-tune.\n", + "3. Create training and validation datasets.\n", + "4. Configure the fine tuning job.\n", + "5. Submit the fine tuning job.\n", + "6. Create serverless deployment using finetuned model and sample inference" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Setup pre-requisites\n", + "* Install dependencies\n", + "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", + "* Connect to `azureml` system registry\n", + "* Set an optional experiment name" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Install dependencies by running below cell. This is not an optional step if running in a new environment.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install azure-ai-ml\n", + "%pip install azure-identity\n", + "\n", + "%pip install mlflow\n", + "%pip install azureml-mlflow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create AzureML Workspace connections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml import MLClient\n", + "from azure.identity import (\n", + " DefaultAzureCredential,\n", + " InteractiveBrowserCredential,\n", + ")\n", + "\n", + "try:\n", + " credential = DefaultAzureCredential()\n", + " credential.get_token(\"https://login.windows.net/\")\n", + "except Exception as ex:\n", + " credential = InteractiveBrowserCredential(tenant_id=\"\")\n", + "\n", + "try:\n", + " workspace_ml_client = MLClient.from_config(credential=credential)\n", + "except:\n", + " workspace_ml_client = MLClient(\n", + " credential,\n", + " subscription_id=\"\",\n", + " resource_group_name=\"\",\n", + " workspace_name=\"\",\n", + " )\n", + "\n", + "# the models, fine tuning pipelines and environments are available in various AzureML system registries,\n", + "# Example: Phi family of models are in \"azureml\", Llama family of models are in \"azureml-meta\" registry.\n", + "registry_ml_client = MLClient(credential, registry_name=\"azureml-meta\")\n", + "\n", + "# Get AzureML workspace object.\n", + "workspace = workspace_ml_client._workspaces.get(workspace_ml_client.workspace_name)\n", + "workspace._workspace_id" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Pick a foundation model to fine tune\n", + "\n", + "`Phi-3-mini-4k-instruct` is a 3.8B parameters, lightweight, state-of-the-art open model built upon datasets used for Phi-2. The model belongs to the Phi-3 model family, and the Mini version comes in two variants 4K and 128K which is the context length (in tokens) it can support. You can browse these models in the Model Catalog in the Azure AI Studio, filtering by the `chat-completion` task. In this example, we use the `Phi-3-mini-4k-instruct` model. If you have opened this notebook for a different model, replace the model name and version accordingly.\n", + "\n", + "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in Azure AI Studio Model Catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_name = \"Phi-3-mini-4k-instruct\" # \"Meta-Llama-3.1-8B-Instruct\"\n", + "foundation_model = registry_ml_client.models.get(model_name, label=\"latest\")\n", + "print(\n", + " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n", + " foundation_model.name, foundation_model.version, foundation_model.id\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.constants._common import AssetTypes\n", + "from azure.ai.ml.entities._inputs_outputs import Input\n", + "\n", + "model_to_finetune = Input(type=AssetTypes.MLFLOW_MODEL, path=foundation_model.id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Prepare data\n", + "\n", + "- The [download-dataset.py](./download-dataset.py) is used to download the ultrachat_200k dataset and transform the dataset into the format expected by model. Also as the dataset is large, hence we here have only part of the dataset.\n", + "- Running the below script downloads only 1% of the data because the dataset is very large. This can be increased by changing `dataset_split_pc` parameter to desired percentage.\n", + "\n", + "**Note** : Some language models have different language codes and hence the column names in the dataset should reflect the same.\n", + "\n", + "The chat-completion dataset is stored in parquet format with each entry using the following schema:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " {\n", + " \"prompt\": \"Create a fully-developed protagonist who is challenged to survive within a dystopian society under the rule of a tyrant. ...\",\n", + " \"messages\":[\",\n", + " {\",\n", + " \"content\": \"Create a fully-developed protagonist who is challenged to survive within a dystopian society under the rule of a tyrant. ...\",\n", + " \"role\": \"user\",\n", + " },\n", + " {\",\n", + " \"content\": \"Name: Ava\\n Ava was just 16 years old when the world as she knew it came crashing down. The government had collapsed, leaving behind a chaotic and lawless society. ...\",\n", + " \"role\": \"assistant\",\n", + " },\n", + " {\",\n", + " \"content\": \"Wow, Ava's story is so intense and inspiring! Can you provide me with more details. ...\",\n", + " \"role\": \"user\",\n", + " },\n", + " {\n", + " \"content\": \"Certainly! ....\",\n", + " \"role\": \"assistant\"\",\n", + " }\n", + " ],\n", + " \"prompt_id\": \"d938b65dfe31f05f80eb8572964c6673eddbd68eff3db6bd234d7f1e3b86c2af\",\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install dependencies for downloading datasets from huggingface\n", + "\n", + "%pip install datasets --upgrade\n", + "%pip install py7zr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# download the dataset using the helper script. This needs datasets library: https://pypi.org/project/datasets/\n", + "# For demo purposes, we are downloading only 1% of the dataset and creating train and validation splits.\n", + "import os\n", + "import shutil\n", + "\n", + "dataset_dir = \"ultrachat_200k_dataset\"\n", + "shutil.rmtree(dataset_dir, ignore_errors=True)\n", + "exit_status = os.system(\n", + " f\"python ./download-dataset.py --dataset HuggingFaceH4/ultrachat_200k --download_dir {dataset_dir} --dataset_split_pc 1\"\n", + ")\n", + "if exit_status != 0:\n", + " raise Exception(\"Error downloading dataset\")\n", + "\n", + "os.rename(f\"./{dataset_dir}/train_sft.jsonl\", f\"./{dataset_dir}/train.jsonl\")\n", + "os.rename(f\"./{dataset_dir}/test_sft.jsonl\", f\"./{dataset_dir}/validation.jsonl\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create data inputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import Data\n", + "\n", + "dataset_version = \"1\"\n", + "train_dataset_name = f\"{dataset_dir}_train\"\n", + "try:\n", + " train_data_asset = workspace_ml_client.data.get(\n", + " train_dataset_name, version=dataset_version\n", + " )\n", + " print(f\"Dataset {train_dataset_name} already exists\")\n", + "except:\n", + " print(\"creating dataset\")\n", + " train_data = Data(\n", + " path=f\"./{dataset_dir}/train.jsonl\",\n", + " type=AssetTypes.URI_FILE,\n", + " description=\"Training dataset\",\n", + " name=train_dataset_name,\n", + " version=\"1\",\n", + " )\n", + " train_data_asset = workspace_ml_client.data.create_or_update(train_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import Data\n", + "\n", + "dataset_version = \"1\"\n", + "validation_dataset_name = f\"{dataset_dir}_validation\"\n", + "try:\n", + " validation_data_asset = workspace_ml_client.data.get(\n", + " validation_dataset_name, version=dataset_version\n", + " )\n", + " print(f\"Dataset {validation_dataset_name} already exists\")\n", + "except:\n", + " print(\"creating dataset\")\n", + " validation_data = Data(\n", + " path=f\"./{dataset_dir}/validation.jsonl\",\n", + " type=AssetTypes.URI_FILE,\n", + " description=\"Validation dataset\",\n", + " name=validation_dataset_name,\n", + " version=\"1\",\n", + " )\n", + " validation_data_asset = workspace_ml_client.data.create_or_update(validation_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities._inputs_outputs import Input\n", + "\n", + "training_data = Input(\n", + " type=train_data_asset.type,\n", + " path=f\"azureml://locations/{workspace.location}/workspaces/{workspace._workspace_id}/data/{train_data_asset.name}/versions/{train_data_asset.version}\",\n", + ")\n", + "validation_data = Input(\n", + " type=validation_data_asset.type,\n", + " path=f\"azureml://locations/{workspace.location}/workspaces/{workspace._workspace_id}/data/{validation_data_asset.name}/versions/{validation_data_asset.version}\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Submit the fine tuning job using the the model and data as inputs\n", + " \n", + "Create FineTuning job using all the data that we have so far." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Create marketplace subscription for 3P models\n", + "Note: Skip this step for 1P(Microsoft) models that are offered on Azure. Example: Phi family of models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_id_to_subscribe = \"/\".join(foundation_model.id.split(\"/\")[:-2])\n", + "print(model_id_to_subscribe)\n", + "\n", + "normalized_model_name = model_name.replace(\".\", \"-\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import MarketplaceSubscription\n", + "\n", + "\n", + "subscription_name = f\"{normalized_model_name}-sub\"\n", + "\n", + "marketplace_subscription = MarketplaceSubscription(\n", + " model_id=model_id_to_subscribe,\n", + " name=subscription_name,\n", + ")\n", + "\n", + "# note: this will throw exception if the subscription already exists or subscription is not required (for example, if the model is not in the marketplace like Phi family)\n", + "try:\n", + " marketplace_subscription = (\n", + " workspace_ml_client.marketplace_subscriptions.begin_create_or_update(\n", + " marketplace_subscription\n", + " ).result()\n", + " )\n", + "except Exception as ex:\n", + " print(ex)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Define finetune parameters\n", + "\n", + "##### There are following set of parameters that are required.\n", + "\n", + "1. `model` - Base model to finetune.\n", + "2. `training_data` - Training data for finetuning the base model.\n", + "3. `validation_data` - Validation data for finetuning the base model.\n", + "4. `task` - FineTuning task to perform. eg. TEXT_COMPLETION for text-generation/text-generation finetuning jobs.\n", + "5. `outputs`- Output registered model name.\n", + "\n", + "##### Following parameters are optional:\n", + "\n", + "1. `hyperparameters` - Parameters that control the FineTuning behavior at runtime.\n", + "2. `name`- FineTuning job name\n", + "3. `experiment_name` - Experiment name for FineTuning job.\n", + "4. `display_name` - FineTuning job display name." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities._job.finetuning.custom_model_finetuning_job import (\n", + " CustomModelFineTuningJob,\n", + ")\n", + "import uuid\n", + "from azure.ai.ml._restclient.v2024_01_01_preview.models import (\n", + " FineTuningTaskType,\n", + ")\n", + "from azure.ai.ml.entities._inputs_outputs import Output\n", + "\n", + "guid = uuid.uuid4()\n", + "short_guid = str(guid)[:8]\n", + "\n", + "finetuning_job = CustomModelFineTuningJob(\n", + " task=FineTuningTaskType.CHAT_COMPLETION,\n", + " training_data=training_data,\n", + " validation_data=validation_data,\n", + " hyperparameters={\n", + " \"per_device_train_batch_size\": \"1\",\n", + " \"learning_rate\": \"0.00002\",\n", + " \"num_train_epochs\": \"1\",\n", + " },\n", + " model=model_to_finetune,\n", + " display_name=f\"ft-job-display-name-{short_guid}\",\n", + " name=f\"ft-job-{short_guid}\",\n", + " experiment_name=\"ft-job-finetuning-experiment\",\n", + " outputs={\n", + " \"registered_model\": Output(\n", + " type=\"mlflow_model\", name=f\"ft-job-finetune-registered-{short_guid}\"\n", + " )\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "created_job = workspace_ml_client.jobs.create_or_update(finetuning_job)\n", + "workspace_ml_client.jobs.get(created_job.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Wait for the above job to complete successfully" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "status = workspace_ml_client.jobs.get(created_job.name).status\n", + "\n", + "import time\n", + "\n", + "while True:\n", + " status = workspace_ml_client.jobs.get(created_job.name).status\n", + " print(f\"Current job status: {status}\")\n", + " if status in [\"Failed\", \"Completed\", \"Canceled\"]:\n", + " print(\"Job has finished with status: {0}\".format(status))\n", + " break\n", + " else:\n", + " print(\"Job is still running. Checking again in 30 seconds.\")\n", + " time.sleep(30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "finetune_model_name = created_job.outputs[\"registered_model\"][\"name\"]\n", + "finetune_model_name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Deploy the model as a serverless endpoint\n", + "\n", + "endpoint_name = f\"{normalized_model_name}-ft-{short_guid}\" # Name must be unique\n", + "model_id = f\"azureml://locations/{workspace.location}/workspaces/{workspace._workspace_id}/models/{finetune_model_name}/versions/1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4. Create serverless endpoint using the finetuned model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import ServerlessEndpoint\n", + "\n", + "serverless_endpoint = ServerlessEndpoint(name=endpoint_name, model_id=model_id)\n", + "\n", + "created_endpoint = workspace_ml_client.serverless_endpoints.begin_create_or_update(\n", + " serverless_endpoint\n", + ").result()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "endpoint = workspace_ml_client.serverless_endpoints.get(endpoint_name)\n", + "endpoint_keys = workspace_ml_client.serverless_endpoints.get_keys(endpoint_name)\n", + "auth_key = endpoint_keys.primary_key" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "url = f\"{endpoint.scoring_uri}/v1/chat/completions\"\n", + "\n", + "payload = {\n", + " \"max_tokens\": 1024,\n", + " \"messages\": [\n", + " {\n", + " \"content\": \"This script is great so far. Can you add more dialogue between Amanda and Thierry to build up their chemistry and connection?\",\n", + " \"role\": \"user\",\n", + " }\n", + " ],\n", + "}\n", + "headers = {\"Content-Type\": \"application/json\", \"Authorization\": f\"{auth_key}\"}\n", + "\n", + "response = requests.post(url, json=payload, headers=headers)\n", + "\n", + "response.json()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/sdk/python/jobs/finetuning/standalone/chat-completion/download-dataset.py b/sdk/python/jobs/finetuning/standalone/chat-completion/download-dataset.py new file mode 100644 index 0000000000..2cc1ad6b9b --- /dev/null +++ b/sdk/python/jobs/finetuning/standalone/chat-completion/download-dataset.py @@ -0,0 +1,40 @@ +# import library to parse command line arguments +import argparse, os + +parser = argparse.ArgumentParser() +# add an argument to specify a dataset name to download +parser.add_argument("--dataset", type=str, default="samsum", help="dataset name") +# add an argument to specify a dataset name to download +parser.add_argument( + "--dataset_subset", type=str, default=None, help="dataset subset name" +) +parser.add_argument( + "--dataset_split_pc", + type=str, + default=None, + help="percentage of each split to be downloaded", +) +# add an argument to specify the directory to download the dataset to +parser.add_argument( + "--download_dir", + type=str, + default="data", + help="directory to download the dataset to", +) +args = parser.parse_args() + +# create the download directory if it does not exist +if not os.path.exists(args.download_dir): + os.makedirs(args.download_dir) + + +# import hugging face datasets library +from datasets import load_dataset, get_dataset_split_names + +for split in get_dataset_split_names(args.dataset, args.dataset_subset): + # load the split of the dataset + split_pc = split + f"[0%:{args.dataset_split_pc}%]" + dataset = load_dataset(args.dataset, args.dataset_subset, split=split_pc) + # save the split of the dataset to the download directory as json lines file + dataset.to_json(os.path.join(args.download_dir, f"{split}.jsonl")) + # print dataset features