Skip to content

Commit

Permalink
Vision Datasets location Update (#3289)
Browse files Browse the repository at this point in the history
* adding continue_on_step_failure False and force_rerun True for vision pipelines

* adding continue_on_step_failure False and force_rerun True for vision pipelines

* adding continue_on_step_failure False and force_rerun True for vision pipelines

* update datasets location

* Replace remaining references to multiclass classification data.

* Replace remaining references to multilabel classification data.

* Replace remaining references to object detection data.

* Replace remaining references to instance segmentation data.

* Patch .zip url.

* Install numpy version compatible with scikit-image==0.19.3.

* Use different names for data sources to prevent job failures due to same name being used at the same time.

---------

Co-authored-by: Radu-Mihai Dondera <rdondera@microsoft.com>
  • Loading branch information
rjaincc and rdondera-microsoft authored Jul 31, 2024
1 parent eb698e9 commit a0e49f8
Show file tree
Hide file tree
Showing 55 changed files with 124 additions and 118 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# Download data
print("Downloading data.")
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -132,7 +132,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images",
name="fridge-items-images-2",
name="fridge-items-images-mc-ft",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# Download data
print("Downloading data.")
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/multilabelFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -135,7 +135,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images",
name="fridge-items-images-2",
name="fridge-items-images-ml-ft",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
os.makedirs(dataset_parent_dir, exist_ok=True)

# Download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-instance-segmentation/odFridgeObjectsMask.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -184,7 +184,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images instance segmentation",
name="fridge-items-images-instance-segmentation",
name="fridge-items-images-is-ft",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand All @@ -198,6 +198,8 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "scikit-image==0.19.3"]
)
# Install numpy version compatible with scikit-image==0.19.3.
subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy==1.26.4"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "simplification"])
print("done")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-object-detection/odFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -187,7 +187,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images Object detection",
name="fridge-items-images-object-detection",
name="fridge-items-images-od-ft",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> N

# download data
if is_multilabel_dataset == 0:
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip"
else:
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/multilabelFridgeObjects.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def download_and_unzip(dataset_parent_dir: str) -> None:

# download data

download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-instance-segmentation/odFridgeObjectsMask.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def download_and_unzip(dataset_parent_dir: str) -> None:

# download data

download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-object-detection/odFridgeObjects.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-object-detection/odFridgeObjects.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def download_and_unzip(dataset_parent_dir: str) -> None:

# download data

download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-object-detection/odFridgeObjects.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-object-detection/odFridgeObjects.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# download data
print("Downloading data.")
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -132,7 +132,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images",
name="fridge-items-images-2",
name="fridge-items-images-mc",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# download data
print("Downloading data.")
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/multilabelFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -135,7 +135,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images",
name="fridge-items-images-2",
name="fridge-items-images-ml",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-instance-segmentation/odFridgeObjectsMask.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -67,7 +67,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images instance segmentation",
name="fridge-items-images-instance-segmentation",
name="fridge-items-images-is",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand All @@ -81,6 +81,8 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "scikit-image==0.19.3"]
)
# Install numpy version compatible with scikit-image==0.19.3.
subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy==1.26.4"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "simplification"])
print("done")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-object-detection/odFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -170,7 +170,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images Object detection",
name="fridge-items-images-object-detection",
name="fridge-items-images-od",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-instance-segmentation/odFridgeObjectsMask.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -67,7 +67,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images instance segmentation",
name="fridge-items-images-instance-segmentation",
name="fridge-items-images-is-p",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand All @@ -81,6 +81,8 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "scikit-image==0.19.3"]
)
# Install numpy version compatible with scikit-image==0.19.3.
subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy==1.26.4"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "simplification"])
print("done")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# download data
print("Downloading data.")
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -132,7 +132,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images",
name="fridge-items-images-2",
name="fridge-items-images-mc-p",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):

# download data
print("Downloading data.")
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-classification/multilabelFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -135,7 +135,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images",
name="fridge-items-images-2",
name="fridge-items-images-ml-p",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
download_url = "https://automlsamplenotebookdata.blob.core.windows.net/image-object-detection/odFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
Expand All @@ -170,7 +170,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
path=dataset_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images Object detection",
name="fridge-items-images-object-detection",
name="fridge-items-images-od-p",
)

uri_folder_data_asset = ml_client.data.create_or_update(my_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"This sample shows how use the evaluate a group of models against a given set of metrics for the `image-classification` task. \n",
"\n",
"### Evaluation dataset\n",
"We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset.\n",
"We will use the [fridgeObjects](https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip) dataset.\n",
"\n",
"### Model\n",
"The goal of evaluating models is to compare their performance on a variety of metrics. `image-classification` is a generic task type. As such, the models you pick to compare must be finetuned for the same scenario. Given that we have the dataset, we would like to look for models finetuned for this specific scenario. We will compare `microsoft-beit-base-patch16-224-pt22k-ft22k` and `microsoft-swinv2-base-patch4-window12-192-22k` in this sample, which are available in the `azureml` system registry.\n",
Expand Down Expand Up @@ -258,7 +258,7 @@
"source": [
"### 4. Prepare the dataset for fine-tuning the model\n",
"\n",
"We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset. The fridge object dataset is stored in a directory. There are four different folders inside:\n",
"We will use the [fridgeObjects](https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip) dataset. The fridge object dataset is stored in a directory. There are four different folders inside:\n",
"- /water_bottle\n",
"- /milk_bottle\n",
"- /carton\n",
Expand Down Expand Up @@ -288,7 +288,7 @@
"os.makedirs(dataset_parent_dir, exist_ok=True)\n",
"\n",
"# download data\n",
"download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip\"\n",
"download_url = \"https://automlsamplenotebookdata.blob.core.windows.net/image-classification/fridgeObjects.zip\"\n",
"\n",
"# Extract current dataset name from dataset url\n",
"dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@
"os.makedirs(dataset_parent_dir, exist_ok=True)\n",
"\n",
"# download data\n",
"download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip\"\n",
"download_url = \"https://automlsamplenotebookdata.blob.core.windows.net/image-classification/multilabelFridgeObjects.zip\"\n",
"\n",
"# Extract current dataset name from dataset url\n",
"dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n",
Expand Down
Loading

0 comments on commit a0e49f8

Please sign in to comment.