Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vision batch deployment cli and sdk notebook #2433

Merged
merged 5 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):

# We'll copy each JSONL file within its related MLTable folder
training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder")
validation_mltable_path = os.path.join(
dataset_parent_dir, "validation-mltable-folder"
)
validation_mltable_path = os.path.join(dataset_parent_dir, "validation-mltable-folder")

# Create MLTable folders, if they don't exist
os.makedirs(training_mltable_path, exist_ok=True)
Expand All @@ -51,12 +49,8 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
train_validation_ratio = 5

# Path to the training and validation files
train_annotations_file = os.path.join(
training_mltable_path, "train_annotations.jsonl"
)
validation_annotations_file = os.path.join(
validation_mltable_path, "validation_annotations.jsonl"
)
train_annotations_file = os.path.join(training_mltable_path, "train_annotations.jsonl")
validation_annotations_file = os.path.join(validation_mltable_path, "validation_annotations.jsonl")

# Baseline of json line dictionary
json_line_sample = {"image_url": uri_folder_data_path, "label": ""}
Expand Down Expand Up @@ -87,20 +81,15 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
print("done")

# Create and save train mltable
train_mltable_file_contents = create_ml_table_file(
os.path.basename(train_annotations_file)
)
train_mltable_file_contents = create_ml_table_file(os.path.basename(train_annotations_file))
save_ml_table_file(training_mltable_path, train_mltable_file_contents)

# Create and save validation mltable
validation_mltable_file_contents = create_ml_table_file(
os.path.basename(validation_annotations_file)
)
validation_mltable_file_contents = create_ml_table_file(os.path.basename(validation_annotations_file))
save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)


def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

Expand Down Expand Up @@ -142,9 +131,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
print("")
print("Path to folder in Blob Storage:")
print(uri_folder_data_asset.path)
create_jsonl_and_mltable_files(
uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir
)
create_jsonl_and_mltable_files(uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir)


def read_image(image_path):
Expand All @@ -153,16 +140,12 @@ def read_image(image_path):


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Prepare data for image classification"
)
parser = argparse.ArgumentParser(description="Prepare data for image classification")

parser.add_argument("--subscription", type=str, help="Subscription ID")
parser.add_argument("--resource_group", type=str, help="Resource group name")
parser.add_argument("--workspace", type=str, help="Workspace name")
parser.add_argument(
"--data_path", type=str, default="./data", help="Dataset location"
)
parser.add_argument("--data_path", type=str, default="./data", help="Dataset location")

args, unknown = parser.parse_known_args()
args_dict = vars(args)
Expand All @@ -178,9 +161,7 @@ def read_image(image_path):
workspace = args.workspace
ml_client = MLClient(credential, subscription_id, resource_group, workspace)

upload_data_and_create_jsonl_mltable_files(
ml_client=ml_client, dataset_parent_dir=args.data_path
)
upload_data_and_create_jsonl_mltable_files(ml_client=ml_client, dataset_parent_dir=args.data_path)

sample_image = os.path.join(args.data_path, "fridgeObjects", "milk_bottle", "99.jpg")
huggingface_request_json = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):

# We'll copy each JSONL file within its related MLTable folder
training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder")
validation_mltable_path = os.path.join(
dataset_parent_dir, "validation-mltable-folder"
)
validation_mltable_path = os.path.join(dataset_parent_dir, "validation-mltable-folder")

# Create MLTable folders, if they don't exist
os.makedirs(training_mltable_path, exist_ok=True)
Expand All @@ -51,12 +49,8 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
train_validation_ratio = 5

# Path to the training and validation files
train_annotations_file = os.path.join(
training_mltable_path, "train_annotations.jsonl"
)
validation_annotations_file = os.path.join(
validation_mltable_path, "validation_annotations.jsonl"
)
train_annotations_file = os.path.join(training_mltable_path, "train_annotations.jsonl")
validation_annotations_file = os.path.join(validation_mltable_path, "validation_annotations.jsonl")

# Path to the labels file.
label_file = os.path.join(dataset_dir, "labels.csv")
Expand Down Expand Up @@ -90,26 +84,23 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
print("done")

# Create and save train mltable
train_mltable_file_contents = create_ml_table_file(
os.path.basename(train_annotations_file)
)
train_mltable_file_contents = create_ml_table_file(os.path.basename(train_annotations_file))
save_ml_table_file(training_mltable_path, train_mltable_file_contents)

# Create and save validation mltable
validation_mltable_file_contents = create_ml_table_file(
os.path.basename(validation_annotations_file)
)
validation_mltable_file_contents = create_ml_table_file(os.path.basename(validation_annotations_file))
save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)


def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
print("Downloading data.")
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
download_url = (
"https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
)

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand Down Expand Up @@ -145,9 +136,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
print("")
print("Path to folder in Blob Storage:")
print(uri_folder_data_asset.path)
create_jsonl_and_mltable_files(
uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir
)
create_jsonl_and_mltable_files(uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir)


def read_image(image_path):
Expand All @@ -156,16 +145,12 @@ def read_image(image_path):


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Prepare data for image classification"
)
parser = argparse.ArgumentParser(description="Prepare data for image classification")

parser.add_argument("--subscription", type=str, help="Subscription ID")
parser.add_argument("--group", type=str, help="Resource group name")
parser.add_argument("--workspace", type=str, help="Workspace name")
parser.add_argument(
"--data_path", type=str, default="./data", help="Dataset location"
)
parser.add_argument("--data_path", type=str, default="./data", help="Dataset location")

args, unknown = parser.parse_known_args()
args_dict = vars(args)
Expand All @@ -181,9 +166,7 @@ def read_image(image_path):
workspace = args.workspace
ml_client = MLClient(credential, subscription_id, resource_group, workspace)

upload_data_and_create_jsonl_mltable_files(
ml_client=ml_client, dataset_parent_dir=args.data_path
)
upload_data_and_create_jsonl_mltable_files(ml_client=ml_client, dataset_parent_dir=args.data_path)

sample_image = os.path.join(args.data_path, "multilabelFridgeObjects", "images", "56.jpg")
huggingface_request_json = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
name: demo
description: "Batch endpoint for for image-classification task"
type: model
resources:
instance_count: 1
settings:
mini_batch_size: 1

Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,11 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.sch
name: demo
instance_type: Standard_DS3_v2
instance_count: 1
liveness_probe:
initial_delay: 180
period: 180
failure_threshold: 49
timeout: 299
request_settings:
request_timeout_ms: 60000
request_timeout_ms: 60000

Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@


set -x
# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-classification
# the sample scoring file available in the same folder as the above notebook

# script inputs
registry_name="azureml-preview"
subscription_id="<SUBSCRIPTION_ID>"
resource_group_name="<RESOURCE_GROUP>"
workspace_name="<WORKSPACE_NAME>"

# This is the model from system registry that needs to be deployed
model_name="microsoft-beit-base-patch16-224-pt22k-ft22k"

model_label="latest"

deployment_compute="cpu-cluster"
# todo: fetch deployment_sku from the min_inference_sku tag of the model
deployment_sku="Standard_DS3_v2"


version=$(date +%s)
endpoint_name="image-classification-$version"
deployment_name="demo-$version"

# Prepare data for deployment
multi_label=0
data_path="data_batch"
python ./prepare_data.py --is_multilabel $multi_label --mode "batch" --data_path $data_path
# sample request data in csv format with image column
if [ $multi_label -eq 1 ]
then
sample_request_csv="./data_batch/ml_image_list.csv"
sample_request_folder="./data_batch/multilabelFridgeObjects"
else
sample_request_csv="./data_batch/mc_image_list.csv"
sample_request_folder="./data_batch/fridgeObjects"
fi

# 1. Setup pre-requisites
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
[ "$workspace_name" = "<WORKSPACE_NAME>" ]; then
echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
exit 1
fi

az account set -s $subscription_id
workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"

# 2. Check if the model exists in the registry
# need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name
then
echo "Model $model_name:$model_label does not exist in registry $registry_name"
exit 1
fi

# get the latest model version
model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv)

# 3. check if compute $deployment_compute exists, else create it
if az ml compute show --name $deployment_compute $workspace_info
then
echo "Compute cluster $deployment_compute already exists"
else
echo "Creating compute cluster $deployment_compute"
az ml compute create --name $deployment_compute --type amlcompute --min-instances 0 --max-instances 2 --size $deployment_sku $workspace_info || {
echo "Failed to create compute cluster $deployment_compute"
exit 1
}
fi

# 4. Deploy the model to an endpoint
# create online endpoint
az ml batch-endpoint create --name $endpoint_name $workspace_info || {
echo "endpoint create failed"; exit 1;
}

# deploy model from registry to endpoint in workspace
az ml batch-deployment create --file ./deploy-batch.yaml $workspace_info --set \
endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
compute=$deployment_compute \
name=$deployment_name || {
echo "deployment create failed"; exit 1;
}

# 5.2 Try a scoring request with image folder

# Check if scoring folder exists
if [ -d $data_path ]; then
echo "Invoking endpoint $endpoint_name with following input:\n\n"
ls $data_path
echo "\n\n"
else
echo "Scoring folder $data_path does not exist"
exit 1
fi

# invoke the endpoint
folder_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \
--deployment-name $deployment_name --input $sample_request_folder --input-type \
uri_folder $workspace_info --query name --output tsv) || {
echo "endpoint invoke failed"; exit 1;
}

# wait for the job to complete
az ml job stream --name $folder_inference_job $workspace_info || {
echo "job stream failed"; exit 1;
}

# 5.2 Try a scoring request with csv file
# Note: If job failed with error Assertion Error (The actual length exceeded max length 100 MB) then
# please try with less number of input images or use ImageFolder Input mode.

# Check if scoring data file exists
if [ -f $sample_request_csv ]; then
echo "Invoking endpoint $endpoint_name with following input:\n\n"
echo "\n\n"
else
echo "Scoring file $sample_request_csv does not exist"
exit 1
fi

# invoke the endpoint
csv_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \
--deployment-name $deployment_name --input $sample_request_csv --input-type \
uri_file $workspace_info --query name --output tsv) || {
echo "endpoint invoke failed"; exit 1;
}

# wait for the job to complete
az ml job stream --name $csv_inference_job $workspace_info || {
echo "job stream failed"; exit 1;
}

# 6. Delete the endpoint
# Batch endpoints use compute resources only when jobs are submitted. You can keep the
# batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint.
# If you created your compute cluster to have zero minimum instances and scale down soon after being idle,
# you won't be charged for an unused compute.
az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
echo "endpoint delete failed"; exit 1;
}
Loading