Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vision batch deployment cli and sdk notebook #2433

Merged
merged 5 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ huggingface_model_name="microsoft/beit-base-patch16-224-pt22k-ft22k"
# This is the foundation model for finetuning from azureml system registry
# using the latest version of the model - not working yet
aml_registry_model_name="microsoft-beit-base-patch16-224-pt22k-ft22k"
model_version=1
model_label="latest"

version=$(date +%s)
finetuned_huggingface_model_name="microsoft-beit-base-patch16-224-pt22k-ft22k-fridge-objects-multiclass-classification"
Expand Down Expand Up @@ -120,12 +120,15 @@ fi

# 3. Check if the model exists in the registry
# need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $aml_registry_model_name --version $model_version --registry-name $registry_name
if ! az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name
then
echo "Model $aml_registry_model_name:$model_version does not exist in registry $registry_name"
echo "Model $aml_registry_model_name:$model_label does not exist in registry $registry_name"
exit 1
fi

# get the latest model version
model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv)

# 4. Prepare data
python prepare_data.py
# training data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):

# We'll copy each JSONL file within its related MLTable folder
training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder")
validation_mltable_path = os.path.join(
dataset_parent_dir, "validation-mltable-folder"
)
validation_mltable_path = os.path.join(dataset_parent_dir, "validation-mltable-folder")

# Create MLTable folders, if they don't exist
os.makedirs(training_mltable_path, exist_ok=True)
Expand All @@ -51,12 +49,8 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
train_validation_ratio = 5

# Path to the training and validation files
train_annotations_file = os.path.join(
training_mltable_path, "train_annotations.jsonl"
)
validation_annotations_file = os.path.join(
validation_mltable_path, "validation_annotations.jsonl"
)
train_annotations_file = os.path.join(training_mltable_path, "train_annotations.jsonl")
validation_annotations_file = os.path.join(validation_mltable_path, "validation_annotations.jsonl")

# Baseline of json line dictionary
json_line_sample = {"image_url": uri_folder_data_path, "label": ""}
Expand Down Expand Up @@ -87,20 +81,15 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
print("done")

# Create and save train mltable
train_mltable_file_contents = create_ml_table_file(
os.path.basename(train_annotations_file)
)
train_mltable_file_contents = create_ml_table_file(os.path.basename(train_annotations_file))
save_ml_table_file(training_mltable_path, train_mltable_file_contents)

# Create and save validation mltable
validation_mltable_file_contents = create_ml_table_file(
os.path.basename(validation_annotations_file)
)
validation_mltable_file_contents = create_ml_table_file(os.path.basename(validation_annotations_file))
save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)


def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

Expand Down Expand Up @@ -142,9 +131,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
print("")
print("Path to folder in Blob Storage:")
print(uri_folder_data_asset.path)
create_jsonl_and_mltable_files(
uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir
)
create_jsonl_and_mltable_files(uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir)


def read_image(image_path):
Expand All @@ -153,16 +140,12 @@ def read_image(image_path):


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Prepare data for image classification"
)
parser = argparse.ArgumentParser(description="Prepare data for image classification")

parser.add_argument("--subscription", type=str, help="Subscription ID")
parser.add_argument("--resource_group", type=str, help="Resource group name")
parser.add_argument("--workspace", type=str, help="Workspace name")
parser.add_argument(
"--data_path", type=str, default="./data", help="Dataset location"
)
parser.add_argument("--data_path", type=str, default="./data", help="Dataset location")

args, unknown = parser.parse_known_args()
args_dict = vars(args)
Expand All @@ -178,9 +161,7 @@ def read_image(image_path):
workspace = args.workspace
ml_client = MLClient(credential, subscription_id, resource_group, workspace)

upload_data_and_create_jsonl_mltable_files(
ml_client=ml_client, dataset_parent_dir=args.data_path
)
upload_data_and_create_jsonl_mltable_files(ml_client=ml_client, dataset_parent_dir=args.data_path)

sample_image = os.path.join(args.data_path, "fridgeObjects", "milk_bottle", "99.jpg")
huggingface_request_json = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ huggingface_model_name="microsoft/beit-base-patch16-224-pt22k-ft22k"
# This is the foundation model for finetuning from azureml system registry
# using the latest version of the model - not working yet
aml_registry_model_name="microsoft-beit-base-patch16-224-pt22k-ft22k"
model_version=1
model_label="latest"

version=$(date +%s)
finetuned_huggingface_model_name="microsoft-beit-base-patch16-224-pt22k-ft22k-fridge-objects-multilabel-classification"
Expand Down Expand Up @@ -119,11 +119,15 @@ fi

# 3. Check if the model exists in the registry
# need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $aml_registry_model_name --version $model_version --registry-name $registry_name
if ! az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name
then
echo "Model $aml_registry_model_name:$model_version does not exist in registry $registry_name"
echo "Model $aml_registry_model_name:$model_label does not exist in registry $registry_name"
exit 1
fi

# get the latest model version
model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv)

# 4. Prepare data
python prepare_data.py
# training data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):

# We'll copy each JSONL file within its related MLTable folder
training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder")
validation_mltable_path = os.path.join(
dataset_parent_dir, "validation-mltable-folder"
)
validation_mltable_path = os.path.join(dataset_parent_dir, "validation-mltable-folder")

# Create MLTable folders, if they don't exist
os.makedirs(training_mltable_path, exist_ok=True)
Expand All @@ -51,12 +49,8 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
train_validation_ratio = 5

# Path to the training and validation files
train_annotations_file = os.path.join(
training_mltable_path, "train_annotations.jsonl"
)
validation_annotations_file = os.path.join(
validation_mltable_path, "validation_annotations.jsonl"
)
train_annotations_file = os.path.join(training_mltable_path, "train_annotations.jsonl")
validation_annotations_file = os.path.join(validation_mltable_path, "validation_annotations.jsonl")

# Path to the labels file.
label_file = os.path.join(dataset_dir, "labels.csv")
Expand Down Expand Up @@ -90,26 +84,23 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
print("done")

# Create and save train mltable
train_mltable_file_contents = create_ml_table_file(
os.path.basename(train_annotations_file)
)
train_mltable_file_contents = create_ml_table_file(os.path.basename(train_annotations_file))
save_ml_table_file(training_mltable_path, train_mltable_file_contents)

# Create and save validation mltable
validation_mltable_file_contents = create_ml_table_file(
os.path.basename(validation_annotations_file)
)
validation_mltable_file_contents = create_ml_table_file(os.path.basename(validation_annotations_file))
save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)


def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
print("Downloading data.")
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
download_url = (
"https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
)

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand Down Expand Up @@ -145,9 +136,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
print("")
print("Path to folder in Blob Storage:")
print(uri_folder_data_asset.path)
create_jsonl_and_mltable_files(
uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir
)
create_jsonl_and_mltable_files(uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir)


def read_image(image_path):
Expand All @@ -156,16 +145,12 @@ def read_image(image_path):


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Prepare data for image classification"
)
parser = argparse.ArgumentParser(description="Prepare data for image classification")

parser.add_argument("--subscription", type=str, help="Subscription ID")
parser.add_argument("--group", type=str, help="Resource group name")
parser.add_argument("--workspace", type=str, help="Workspace name")
parser.add_argument(
"--data_path", type=str, default="./data", help="Dataset location"
)
parser.add_argument("--data_path", type=str, default="./data", help="Dataset location")

args, unknown = parser.parse_known_args()
args_dict = vars(args)
Expand All @@ -181,9 +166,7 @@ def read_image(image_path):
workspace = args.workspace
ml_client = MLClient(credential, subscription_id, resource_group, workspace)

upload_data_and_create_jsonl_mltable_files(
ml_client=ml_client, dataset_parent_dir=args.data_path
)
upload_data_and_create_jsonl_mltable_files(ml_client=ml_client, dataset_parent_dir=args.data_path)

sample_image = os.path.join(args.data_path, "multilabelFridgeObjects", "images", "56.jpg")
huggingface_request_json = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
name: demo
description: "Batch endpoint for for image-classification task"
type: model
resources:
instance_count: 1
settings:
mini_batch_size: 1

Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,11 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.sch
name: demo
instance_type: Standard_DS3_v2
instance_count: 1
liveness_probe:
initial_delay: 180
period: 180
failure_threshold: 49
timeout: 299
request_settings:
request_timeout_ms: 60000
request_timeout_ms: 60000

Loading