microsoft · annazietlow · Feb 5, 2020 · Feb 6, 2020 · Feb 7, 2020 · Feb 7, 2020
diff --git a/cv_lib/cv_lib/segmentation/dutchf3/__init__.py b/cv_lib/cv_lib/segmentation/dutchf3/__init__.py
diff --git a/experiments/interpretation/dutchf3_patch/local/azureml_requirements.txt b/experiments/interpretation/dutchf3_patch/local/azureml_requirements.txt
@@ -0,0 +1,13 @@
+git+https://github.com/olgaliak/seismic-deeplearning.git@azureml-train-pipeline#egg=cv_lib&subdirectory=cv_lib
+git+https://github.com/microsoft/seismic-deeplearning.git#egg=deepseismic-interpretation&subdirectory=interpretation
+opencv-python==4.1.2.30
+numpy>=1.17.0
+torch==1.4.0
+pytorch-ignite==0.3.0.dev20191105 # pre-release until stable available
+fire==0.2.1
+albumentations==0.4.3
+toolz==0.10.0
+segyio==1.8.8
+scipy==1.1.0
+gitpython==3.0.5
+yacs==0.1.6
diff --git a/experiments/interpretation/dutchf3_patch/local/train.py b/experiments/interpretation/dutchf3_patch/local/train.py
@@ -28,6 +28,7 @@
 from ignite.utils import convert_tensor
 from toolz import compose
 from torch.utils import data
+from git import InvalidGitRepositoryError
 
 from deepseismic_interpretation.dutchf3.data import get_patch_loader, decode_segmap
 from cv_lib.utils import load_log_configuration
@@ -76,7 +77,7 @@ def prepare_batch(batch, device=None, non_blocking=False):
     )
 
 
-def run(*options, cfg=None, debug=False):
+def run(*options, cfg=None, debug=False, input=None):
     """Run training and validation of model
 
     Notes:
@@ -90,13 +91,22 @@ def run(*options, cfg=None, debug=False):
                                       default.py
         cfg (str, optional): Location of config file to load. Defaults to None.
         debug (bool): Places scripts in debug/test mode and only executes a few iterations
+        input (str, optional): Location of data if Azure ML run, 
+            for local runs input is config.DATASET.ROOT
     """
 
     update_config(config, options=options, config_file=cfg)
 
     # we will write the model under outputs / config_file_name / model_dir
     config_file_name = "default_config" if not cfg else cfg.split("/")[-1].split(".")[0]
 
+    data_dir = config.DATASET.ROOT
+    output_dir = config.OUTPUT_DIR
+
+    if input is not None:
+        data_dir = input
+        output_dir = data_dir + output_dir
+
     # Start logging
     load_log_configuration(config.LOG_CONFIG)
     logger = logging.getLogger(__name__)
@@ -141,7 +151,7 @@ def run(*options, cfg=None, debug=False):
     TrainPatchLoader = get_patch_loader(config)
 
     train_set = TrainPatchLoader(
-        config.DATASET.ROOT,
+        data_dir,
         split="train",
         is_transform=True,
         stride=config.TRAIN.STRIDE,
@@ -150,7 +160,7 @@ def run(*options, cfg=None, debug=False):
     )
     logger.info(train_set)
     val_set = TrainPatchLoader(
-        config.DATASET.ROOT,
+        data_dir,
         split="val",
         is_transform=True,
         stride=config.TRAIN.STRIDE,
@@ -180,9 +190,9 @@ def run(*options, cfg=None, debug=False):
     )
 
     try:
-        output_dir = generate_path(config.OUTPUT_DIR, git_branch(), git_hash(), config_file_name, config.TRAIN.MODEL_DIR, current_datetime(),)
-    except TypeError:
-        output_dir = generate_path(config.OUTPUT_DIR, config_file_name, config.TRAIN.MODEL_DIR, current_datetime(),)
+        output_dir = generate_path(output_dir, git_branch(), git_hash(), config_file_name, config.TRAIN.MODEL_DIR, current_datetime(),)
+    except (TypeError, InvalidGitRepositoryError):
+        output_dir = generate_path(output_dir, config_file_name, config.TRAIN.MODEL_DIR, current_datetime(),)
 
     summary_writer = create_summary_writer(log_dir=path.join(output_dir, config.LOG_DIR))
 

diff --git a/interpretation/deepseismic_interpretation/azureml_pipelines/README.md b/interpretation/deepseismic_interpretation/azureml_pipelines/README.md
@@ -0,0 +1,175 @@
+# Integrating with AzureML
+
+## Running a Pipeline in AzureML
+Set the following environment variables:
+```
+BLOB_ACCOUNT_NAME
+BLOB_CONTAINER_NAME
+BLOB_ACCOUNT_KEY
+BLOB_SUB_ID
+AML_COMPUTE_CLUSTER_NAME
+AML_COMPUTE_CLUSTER_MIN_NODES
+AML_COMPUTE_CLUSTER_MAX_NODES
+AML_COMPUTE_CLUSTER_SKU
+```
+On Windows you can use:
+`set VARIABLE=value`
+On Linux:
+`export VARIABLE=value`
+These can be set automatically in VSCode in an .env file, run with `source .env` in Linux or made into a `.bat` file to easily run from the command line in Windows. You can ask a team member for a .env file configured for our development environment to save time.
+
+Create a .azureml/config.json file in the project's root directory that looks like so:
+```json
+{
+"subscription_id": "<subscription id>",
+"resource_group": "<resource group>",
+"workspace_name": "<workspace name>"
+}
+
+```
+
+## Training Pipeline
+Here's an example of a possible config file:
+```json
+{
+    "step1":
+    {
+        "type": "PythonScriptStep",
+        "name": "process all files step",
+        "script": "process_all_files.py",
+        "input_datareference_path": "",
+        "input_datareference_name": "raw_input_data",
+        "input_dataset_name": "raw_input_data",
+        "source_directory": "src/first_preprocess/",
+        "arguments": ["--remote_run",
+        "--input_path", "input/",
+        "--output_path", "normalized_data"],
+        "requirements": "src/first_preprocess/preprocess_requirements.txt",
+        "node_count": 1,
+        "processes_per_node": 1
+    },
+    "step2":
+    {
+        "type": "PythonScriptStep",
+        "name": "prepare files step",
+        "script": "prepare_files.py",
+        "input_datareference_path": "normalized_data/",
+        "input_datareference_name": "normalized_data_conditioned",
+        "input_dataset_name": "normalizeddataconditioned",
+        "source_directory": "src/second_preprocess",
+        "arguments": ["split_train_val",
+        "patch",
+        "--label_file", "label.npy",
+        "--output_dir", "splits/",
+        "--stride=25",
+        "--patch=100.",
+        "--log_config", "configs/logging.conf"],
+        "requirements": "src/second_preprocess/prepare_files_requirements.txt",
+        "node_count": 1,
+        "processes_per_node": 1,
+        "base_image": "pytorch/pytorch"
+    },
+    "step3":
+    {
+        "type": "MpiStep",
+        "name": "train step",
+        "script": "train.py",
+        "input_datareference_path": "normalized_data/",
+        "input_datareference_name": "normalized_data_conditioned",
+        "input_dataset_name": "normalizeddataconditioned",
+        "source_directory": "train/",
+        "arguments": ["--splits", "splits",
+        "--train_data_paths", "normalized_data/file.npy",
+        "--label_paths", "label.npy"],
+        "requirements": "train/requirements.txt",
+        "node_count": 1,
+        "processes_per_node": 1,
+        "base_image": "pytorch/pytorch"
+    }
+}
+```
+
+If you want to create a train pipeline:
+1) All of your steps are isolated
+    - Your scripts will need to conform to the interface you define in the config file
+        - I.e., if step1 is expected to output X and step 2 is expecting X as an input, your scripts need to reflect that
+    - If one of your steps has pip package dependencies, make sure it's specified in a requirements.txt file
+    - If your script has local dependencies (i.e., is importing from another script) make sure that all dependencies fall underneath the source_directory
+2) You have configured your config file to specify the steps needed (see the section below "Configuring a Pipeline" for guidance)
+
+Note: the following arguments are automatically added to any script steps by AzureML:
+```--input_data``` and ```--output``` (if output is specified in the pipeline_config.json)
+Make sure to add these arguments in your scripts like so:
+```python
+parser.add_argument('--input_data', type=str, help='path to preprocessed data')
+parser.add_argument('--output', type=str, help='output from training')
+```
+```input_data``` is the absolute path to the input_datareference_path on the blob you specified.
+
+# Configuring a Pipeline
+
+## Train Pipeline
+Define parameters for the run in a config file. See an example [here](../pipeline_config.json)
+```json
+{
+    "step1":
+    {
+        "type": "<type of step. Supported types include PythonScriptStep and MpiStep>",
+        "name": "<name in AzureML for this step>",
+        "script": "<path to script for this step>",
+        "output": "<name of the output in AzureML for this step - optional>",
+        "input_datareference_path": "<path on the data reference for the input data - optional>",
+        "input_datareference_name": "<name of the data reference in AzureML where the input data lives - optional>",
+        "input_dataset_name": "<name of the datastore in AzureML - optional>",
+        "source_directory": "<source directory containing the files for this step>",
+        "arguments": "<arguments to pass to the script - optional>",
+        "requirements": "<path to the requirements.txt file for the step - optional>",
+        "node_count": "<number of nodes to run the script on - optional>",
+        "processes_per_node": "<number of processes to run on each node - optional>",
+        "base_image": "<name of an image registered on dockerhub that you want to use as your base image"
+    },
+
+    "step2":
+    {
+        .
+        .
+        .
+    }
+}
+```
+
+## Kicking off a Pipeline
+In order to kick off a pipeline, you will need to use the AzureCLI to login to the subscription where your workspace resides:
+```bash
+az login
+az account set -s <subscription id>
+```
+Kick off the training pipeline defined in your config via your python environment of choice. The code will look like this:
+```python
+from src.azml.train_pipeline.train_pipeline import TrainPipeline
+
+orchestrator = TrainPipeline("<path to your config file>")
+orchestrator.construct_pipeline()
+run = orchestrator.run_pipeline(experiment_name="DEV-train-pipeline")
+```
+See an example in [dev/kickoff_train_pipeline.py](dev/kickoff_train_pipeline.py)
+
+If this fails due to access to the Azure ML subscription, you may be able to connect by using a workaround:
+Go to [base_pipeline.py](../base_pipeline.py) and add the following import:
+```python
+from azureml.core.authentication import AzureCliAuthentication
+```
+Then find the code where we connect to the workspace which looks like this:
+```python
+self.ws = Workspace.from_config(path=ws_config)
+```
+and replace it with  this:
+```python
+cli_auth = AzureCliAuthentication()
+self.ws = Workspace(subscription_id=<subscription id>, resource_group=<resource group>, workspace_name=<workspace name>, auth=cli_auth)
+```
+to get this to run, you will also need to `pip install azure-cli-core`
+Then you can go back and follow the instructions above, including az login and setting the subscription, and kick off the pipeline.
+
+## Cancelling a Pipeline Run
+If you kicked off a pipeline and want to cancel it, run the [cancel_run.py](dev/cancel_run.py) script with the corresponding run_id and step_id.
diff --git a/interpretation/deepseismic_interpretation/azureml_pipelines/__init__.py b/interpretation/deepseismic_interpretation/azureml_pipelines/__init__.py