Azure · gauravrajguru · Jul 10, 2023 · Jul 7, 2023 · Jul 7, 2023 · Jul 10, 2023
diff --git a/...ion-models/system/finetune/image-classification/multiclass-classification/prepare_data.py b/...ion-models/system/finetune/image-classification/multiclass-classification/prepare_data.py
@@ -40,9 +40,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
 
     # We'll copy each JSONL file within its related MLTable folder
     training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder")
-    validation_mltable_path = os.path.join(
-        dataset_parent_dir, "validation-mltable-folder"
-    )
+    validation_mltable_path = os.path.join(dataset_parent_dir, "validation-mltable-folder")
 
     # Create MLTable folders, if they don't exist
     os.makedirs(training_mltable_path, exist_ok=True)
@@ -51,12 +49,8 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
     train_validation_ratio = 5
 
     # Path to the training and validation files
-    train_annotations_file = os.path.join(
-        training_mltable_path, "train_annotations.jsonl"
-    )
-    validation_annotations_file = os.path.join(
-        validation_mltable_path, "validation_annotations.jsonl"
-    )
+    train_annotations_file = os.path.join(training_mltable_path, "train_annotations.jsonl")
+    validation_annotations_file = os.path.join(validation_mltable_path, "validation_annotations.jsonl")
 
     # Baseline of json line dictionary
     json_line_sample = {"image_url": uri_folder_data_path, "label": ""}
@@ -87,20 +81,15 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
     print("done")
 
     # Create and save train mltable
-    train_mltable_file_contents = create_ml_table_file(
-        os.path.basename(train_annotations_file)
-    )
+    train_mltable_file_contents = create_ml_table_file(os.path.basename(train_annotations_file))
     save_ml_table_file(training_mltable_path, train_mltable_file_contents)
 
     # Create and save validation mltable
-    validation_mltable_file_contents = create_ml_table_file(
-        os.path.basename(validation_annotations_file)
-    )
+    validation_mltable_file_contents = create_ml_table_file(os.path.basename(validation_annotations_file))
     save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)
 
 
 def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
-
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
@@ -142,9 +131,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     print("")
     print("Path to folder in Blob Storage:")
     print(uri_folder_data_asset.path)
-    create_jsonl_and_mltable_files(
-        uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir
-    )
+    create_jsonl_and_mltable_files(uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir)
 
 
 def read_image(image_path):
@@ -153,16 +140,12 @@ def read_image(image_path):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Prepare data for image classification"
-    )
+    parser = argparse.ArgumentParser(description="Prepare data for image classification")
 
     parser.add_argument("--subscription", type=str, help="Subscription ID")
     parser.add_argument("--resource_group", type=str, help="Resource group name")
     parser.add_argument("--workspace", type=str, help="Workspace name")
-    parser.add_argument(
-        "--data_path", type=str, default="./data", help="Dataset location"
-    )
+    parser.add_argument("--data_path", type=str, default="./data", help="Dataset location")
 
     args, unknown = parser.parse_known_args()
     args_dict = vars(args)
@@ -178,9 +161,7 @@ def read_image(image_path):
         workspace = args.workspace
         ml_client = MLClient(credential, subscription_id, resource_group, workspace)
 
-    upload_data_and_create_jsonl_mltable_files(
-        ml_client=ml_client, dataset_parent_dir=args.data_path
-    )
+    upload_data_and_create_jsonl_mltable_files(ml_client=ml_client, dataset_parent_dir=args.data_path)
 
     sample_image = os.path.join(args.data_path, "fridgeObjects", "milk_bottle", "99.jpg")
     huggingface_request_json = {

diff --git a/...ion-models/system/finetune/image-classification/multilabel-classification/prepare_data.py b/...ion-models/system/finetune/image-classification/multilabel-classification/prepare_data.py
@@ -40,9 +40,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
 
     # We'll copy each JSONL file within its related MLTable folder
     training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder")
-    validation_mltable_path = os.path.join(
-        dataset_parent_dir, "validation-mltable-folder"
-    )
+    validation_mltable_path = os.path.join(dataset_parent_dir, "validation-mltable-folder")
 
     # Create MLTable folders, if they don't exist
     os.makedirs(training_mltable_path, exist_ok=True)
@@ -51,12 +49,8 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
     train_validation_ratio = 5
 
     # Path to the training and validation files
-    train_annotations_file = os.path.join(
-        training_mltable_path, "train_annotations.jsonl"
-    )
-    validation_annotations_file = os.path.join(
-        validation_mltable_path, "validation_annotations.jsonl"
-    )
+    train_annotations_file = os.path.join(training_mltable_path, "train_annotations.jsonl")
+    validation_annotations_file = os.path.join(validation_mltable_path, "validation_annotations.jsonl")
 
     # Path to the labels file.
     label_file = os.path.join(dataset_dir, "labels.csv")
@@ -90,26 +84,23 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
     print("done")
 
     # Create and save train mltable
-    train_mltable_file_contents = create_ml_table_file(
-        os.path.basename(train_annotations_file)
-    )
+    train_mltable_file_contents = create_ml_table_file(os.path.basename(train_annotations_file))
     save_ml_table_file(training_mltable_path, train_mltable_file_contents)
 
     # Create and save validation mltable
-    validation_mltable_file_contents = create_ml_table_file(
-        os.path.basename(validation_annotations_file)
-    )
+    validation_mltable_file_contents = create_ml_table_file(os.path.basename(validation_annotations_file))
     save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)
 
 
 def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
-
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
     # download data
     print("Downloading data.")
-    download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
+    download_url = (
+        "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
+    )
 
     # Extract current dataset name from dataset url
     dataset_name = os.path.basename(download_url).split(".")[0]
@@ -145,9 +136,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     print("")
     print("Path to folder in Blob Storage:")
     print(uri_folder_data_asset.path)
-    create_jsonl_and_mltable_files(
-        uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir
-    )
+    create_jsonl_and_mltable_files(uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir)
 
 
 def read_image(image_path):
@@ -156,16 +145,12 @@ def read_image(image_path):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Prepare data for image classification"
-    )
+    parser = argparse.ArgumentParser(description="Prepare data for image classification")
 
     parser.add_argument("--subscription", type=str, help="Subscription ID")
     parser.add_argument("--group", type=str, help="Resource group name")
     parser.add_argument("--workspace", type=str, help="Workspace name")
-    parser.add_argument(
-        "--data_path", type=str, default="./data", help="Dataset location"
-    )
+    parser.add_argument("--data_path", type=str, default="./data", help="Dataset location")
 
     args, unknown = parser.parse_known_args()
     args_dict = vars(args)
@@ -181,9 +166,7 @@ def read_image(image_path):
         workspace = args.workspace
         ml_client = MLClient(credential, subscription_id, resource_group, workspace)
 
-    upload_data_and_create_jsonl_mltable_files(
-        ml_client=ml_client, dataset_parent_dir=args.data_path
-    )
+    upload_data_and_create_jsonl_mltable_files(ml_client=ml_client, dataset_parent_dir=args.data_path)
 
     sample_image = os.path.join(args.data_path, "multilabelFridgeObjects", "images", "56.jpg")
     huggingface_request_json = {

diff --git a/cli/foundation-models/system/inference/image-classification/deploy-batch.yaml b/cli/foundation-models/system/inference/image-classification/deploy-batch.yaml
@@ -0,0 +1,9 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+description: "Batch endpoint for for image-classification task"
+type: model
+resources:
+    instance_count: 1
+settings:
+    mini_batch_size: 1
+
diff --git a/...nference/image-classification/deploy.yaml → ...e/image-classification/deploy-online.yaml b/...nference/image-classification/deploy.yaml → ...e/image-classification/deploy-online.yaml
@@ -2,5 +2,11 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.sch
 name: demo
 instance_type: Standard_DS3_v2
 instance_count: 1
+liveness_probe:
+  initial_delay: 180
+  period: 180
+  failure_threshold: 49
+  timeout: 299
 request_settings:
-  request_timeout_ms: 60000
+  request_timeout_ms: 60000
+
diff --git a/...ation-models/system/inference/image-classification/image-classification-batch-endpoint.sh b/...ation-models/system/inference/image-classification/image-classification-batch-endpoint.sh
@@ -0,0 +1,145 @@
+
+
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-classification
+# the sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml-preview"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="microsoft-beit-base-patch16-224-pt22k-ft22k"
+
+model_label="latest"
+
+deployment_compute="cpu-cluster"
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS3_v2"
+
+
+version=$(date +%s)
+endpoint_name="image-classification-$version"
+deployment_name="demo-$version"
+
+# Prepare data for deployment
+multi_label=0
+data_path="data_batch"
+python ./prepare_data.py --is_multilabel $multi_label --mode "batch" --data_path $data_path
+# sample request data in csv format with image column
+if [ $multi_label -eq 1 ]
+then
+    sample_request_csv="./data_batch/ml_image_list.csv"
+    sample_request_folder="./data_batch/multilabelFridgeObjects"
+else
+    sample_request_csv="./data_batch/mc_image_list.csv"
+    sample_request_folder="./data_batch/fridgeObjects"
+fi
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_label does not exist in registry $registry_name"
+    exit 1
+fi
+
+# get the latest model version
+model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv)
+
+# 3. check if compute $deployment_compute exists, else create it
+if az ml compute show --name $deployment_compute $workspace_info
+then
+    echo "Compute cluster $deployment_compute already exists"
+else
+    echo "Creating compute cluster $deployment_compute"
+    az ml compute create --name $deployment_compute --type amlcompute --min-instances 0 --max-instances 2 --size $deployment_sku $workspace_info || {
+        echo "Failed to create compute cluster $deployment_compute"
+        exit 1
+    }
+fi
+
+# 4. Deploy the model to an endpoint
+# create online endpoint 
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file ./deploy-batch.yaml $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  compute=$deployment_compute \
+  name=$deployment_name || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 5.2 Try a scoring request with image folder
+
+# Check if scoring folder exists
+if [ -d $data_path ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    ls $data_path
+    echo "\n\n"
+else
+    echo "Scoring folder $data_path does not exist"
+    exit 1
+fi
+
+# invoke the endpoint
+folder_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \
+ --deployment-name $deployment_name --input $sample_request_folder --input-type \
+  uri_folder $workspace_info --query name --output tsv) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# wait for the job to complete
+az ml job stream --name $folder_inference_job $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 5.2 Try a scoring request with csv file
+# Note: If job failed with error Assertion Error (The actual length exceeded max length 100 MB) then 
+# please try with less number of input images or use ImageFolder Input mode.
+
+# Check if scoring data file exists
+if [ -f $sample_request_csv ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    echo "\n\n"
+else
+    echo "Scoring file $sample_request_csv does not exist"
+    exit 1
+fi
+
+# invoke the endpoint
+csv_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \
+ --deployment-name $deployment_name --input $sample_request_csv --input-type \
+  uri_file $workspace_info --query name --output tsv) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# wait for the job to complete
+az ml job stream --name $csv_inference_job $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+# Batch endpoints use compute resources only when jobs are submitted. You can keep the 
+# batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. 
+# If you created your compute cluster to have zero minimum instances and scale down soon after being idle, 
+# you won't be charged for an unused compute.
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}