From 73044ae051692e7ef7b72c21b370e8347d83ace6 Mon Sep 17 00:00:00 2001 From: liuzhe Date: Thu, 3 Jun 2021 11:47:09 +0800 Subject: [PATCH 1/7] port examples to v2 and fix bugs --- examples/trials/auto-gbdt/config.yml | 23 +- examples/trials/auto-gbdt/config_metis.yml | 35 +-- examples/trials/auto-gbdt/config_pai.yml | 35 --- examples/trials/auto-gbdt/search_space.json | 22 +- .../trials/auto-gbdt/search_space_metis.json | 5 - .../hyperband/config_hyperband.yml | 29 +-- examples/trials/cifar10_pytorch/config.yml | 29 +-- .../trials/cifar10_pytorch/config_pai.yml | 35 --- .../trials/cifar10_pytorch/search_space.json | 15 +- examples/trials/efficientnet/config.yml | 15 ++ examples/trials/efficientnet/config_local.yml | 18 -- examples/trials/efficientnet/config_pai.yml | 31 --- examples/trials/efficientnet/tuner.py | 2 +- examples/trials/ga_squad/config.yml | 24 +- examples/trials/ga_squad/config_pai.yml | 35 --- examples/trials/kaggle-tgs-salt/config.yml | 23 +- examples/trials/mnist-advisor/config_bohb.yml | 27 +-- .../trials/mnist-advisor/config_hyperband.yml | 37 +-- examples/trials/mnist-advisor/config_pai.yml | 41 ---- examples/trials/mnist-annotation/config.yml | 24 +- .../trials/mnist-annotation/config_gpu.yml | 20 -- .../mnist-annotation/config_kubeflow.yml | 31 --- .../trials/mnist-annotation/config_pai.yml | 34 --- .../trials/mnist-annotation/config_remote.yml | 33 --- .../trials/mnist-batch-tune-keras/config.yml | 24 +- .../mnist-batch-tune-keras/config_pai.yml | 32 --- .../config_kubeflow.yml | 0 .../dist_mnist.py | 0 .../search_space.json | 0 examples/trials/mnist-keras/config.yml | 21 -- examples/trials/mnist-keras/config_pai.yml | 35 --- examples/trials/mnist-keras/mnist-keras.py | 137 ----------- examples/trials/mnist-keras/search_space.json | 4 - .../mnist-nested-search-space/config.yml | 26 +- .../search_space.json | 226 +++++++++--------- .../trials/mnist-pbt-tuner-pytorch/config.yml | 28 +-- examples/trials/mnist-pytorch/config.yml | 27 +-- examples/trials/mnist-pytorch/config_aml.yml | 32 +-- .../trials/mnist-pytorch/config_detailed.yml | 42 ++++ .../trials/mnist-pytorch/config_openpai.yml | 20 ++ examples/trials/mnist-pytorch/config_pai.yml | 35 --- .../trials/mnist-pytorch/config_remote.yml | 24 ++ .../mnist-pytorch/config_tensorboard.yml | 24 +- examples/trials/mnist-pytorch/config_v2.yml | 23 -- .../trials/mnist-pytorch/config_windows.yml | 21 -- .../trials/mnist-sharedstorage/config_nfs.yml | 2 +- examples/trials/mnist-tfv2/config.yml | 27 +-- .../trials/mnist-tfv2/config_assessor.yml | 31 +-- .../trials/mnist-tfv2/config_detailed.yml | 46 ++++ examples/trials/mnist-tfv2/config_remote.yml | 50 ++-- examples/trials/mnist-tfv2/config_v2.yml | 26 -- examples/trials/mnist-tfv2/config_windows.yml | 21 -- .../network_morphism/FashionMNIST/config.yml | 41 ++-- .../FashionMNIST/config_pai.yml | 42 ---- .../network_morphism/cifar10/config.yml | 41 ++-- .../network_morphism/cifar10/config_pai.yml | 42 ---- .../trials/sklearn/classification/config.yml | 23 +- .../sklearn/classification/config_pai.yml | 35 --- examples/trials/sklearn/regression/config.yml | 23 +- .../trials/sklearn/regression/config_pai.yml | 35 --- nni/experiment/config/base.py | 2 +- nni/experiment/config/common.py | 8 +- nni/experiment/config/local.py | 5 +- nni/experiment/config/remote.py | 4 +- nni/experiment/config/util.py | 7 + nni/tools/nnictl/launcher.py | 32 +-- nni/tools/package_utils/__init__.py | 2 +- 67 files changed, 564 insertions(+), 1385 deletions(-) delete mode 100644 examples/trials/auto-gbdt/config_pai.yml delete mode 100644 examples/trials/auto-gbdt/search_space_metis.json delete mode 100644 examples/trials/cifar10_pytorch/config_pai.yml create mode 100644 examples/trials/efficientnet/config.yml delete mode 100644 examples/trials/efficientnet/config_local.yml delete mode 100644 examples/trials/efficientnet/config_pai.yml delete mode 100644 examples/trials/ga_squad/config_pai.yml delete mode 100644 examples/trials/mnist-advisor/config_pai.yml delete mode 100644 examples/trials/mnist-annotation/config_gpu.yml delete mode 100644 examples/trials/mnist-annotation/config_kubeflow.yml delete mode 100644 examples/trials/mnist-annotation/config_pai.yml delete mode 100644 examples/trials/mnist-annotation/config_remote.yml delete mode 100644 examples/trials/mnist-batch-tune-keras/config_pai.yml rename examples/trials/{mnist-distributed => mnist-distributed-tfv1}/config_kubeflow.yml (100%) rename examples/trials/{mnist-distributed => mnist-distributed-tfv1}/dist_mnist.py (100%) rename examples/trials/{mnist-distributed => mnist-distributed-tfv1}/search_space.json (100%) delete mode 100644 examples/trials/mnist-keras/config.yml delete mode 100644 examples/trials/mnist-keras/config_pai.yml delete mode 100644 examples/trials/mnist-keras/mnist-keras.py delete mode 100644 examples/trials/mnist-keras/search_space.json create mode 100644 examples/trials/mnist-pytorch/config_detailed.yml create mode 100644 examples/trials/mnist-pytorch/config_openpai.yml delete mode 100644 examples/trials/mnist-pytorch/config_pai.yml create mode 100644 examples/trials/mnist-pytorch/config_remote.yml delete mode 100644 examples/trials/mnist-pytorch/config_v2.yml delete mode 100644 examples/trials/mnist-pytorch/config_windows.yml create mode 100644 examples/trials/mnist-tfv2/config_detailed.yml delete mode 100644 examples/trials/mnist-tfv2/config_v2.yml delete mode 100644 examples/trials/mnist-tfv2/config_windows.yml delete mode 100644 examples/trials/network_morphism/FashionMNIST/config_pai.yml delete mode 100644 examples/trials/network_morphism/cifar10/config_pai.yml delete mode 100644 examples/trials/sklearn/classification/config_pai.yml delete mode 100644 examples/trials/sklearn/regression/config_pai.yml diff --git a/examples/trials/auto-gbdt/config.yml b/examples/trials/auto-gbdt/config.yml index 45b00a0994..38bdd5b80f 100644 --- a/examples/trials/auto-gbdt/config.yml +++ b/examples/trials/auto-gbdt/config.yml @@ -1,21 +1,10 @@ -authorName: default -experimentName: example_auto-gbdt +searchSpaceFile: search_space.json +trialCommand: python3 main.py trialConcurrency: 1 -maxExecDuration: 10h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: minimize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 \ No newline at end of file +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/auto-gbdt/config_metis.yml b/examples/trials/auto-gbdt/config_metis.yml index dd78272c9c..0999d8cb52 100644 --- a/examples/trials/auto-gbdt/config_metis.yml +++ b/examples/trials/auto-gbdt/config_metis.yml @@ -1,21 +1,22 @@ -authorName: default -experimentName: example_auto-gbdt-metis +# The search space of Metis tuner is slightly different from TPE and others. +# See Metis tuner' doc for details: https://nni.readthedocs.io/en/stable/Tuner/MetisTuner.html +searchSpace: + num_leaves: + _type: choice + _value: [31, 28, 24, 20] + learning_rate: + _type: choice + _value: [0.01, 0.05, 0.1, 0.2] + bagging_freq: + _type: choice + _value: [1, 2, 4, 8, 10] + +trialCommand: python3 main.py trialConcurrency: 1 -maxExecDuration: 10h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space_metis.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: MetisTuner + name: MetisTuner classArgs: - #choice: maximize, minimize optimize_mode: minimize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 +trainingService: + platform: local diff --git a/examples/trials/auto-gbdt/config_pai.yml b/examples/trials/auto-gbdt/config_pai.yml deleted file mode 100644 index 912971a0fa..0000000000 --- a/examples/trials/auto-gbdt/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_auto-gbdt -trialConcurrency: 1 -maxExecDuration: 10h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: minimize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/auto-gbdt/search_space.json b/examples/trials/auto-gbdt/search_space.json index ea09eca9e7..e55aaa6b79 100644 --- a/examples/trials/auto-gbdt/search_space.json +++ b/examples/trials/auto-gbdt/search_space.json @@ -1,6 +1,18 @@ { - "num_leaves":{"_type":"randint","_value":[20, 31]}, - "learning_rate":{"_type":"choice","_value":[0.01, 0.05, 0.1, 0.2]}, - "bagging_fraction":{"_type":"uniform","_value":[0.7, 1.0]}, - "bagging_freq":{"_type":"choice","_value":[1, 2, 4, 8, 10]} -} \ No newline at end of file + "num_leaves": { + "_type": "randint", + "_value": [20, 31] + }, + "learning_rate": { + "_type": "choice", + "_value": [0.01, 0.05, 0.1, 0.2] + }, + "bagging_fraction": { + "_type": "uniform", + "_value": [0.7, 1.0] + }, + "bagging_freq": { + "_type": "choice", + "_value": [1, 2, 4, 8, 10] + } +} diff --git a/examples/trials/auto-gbdt/search_space_metis.json b/examples/trials/auto-gbdt/search_space_metis.json deleted file mode 100644 index 6bfbc32afa..0000000000 --- a/examples/trials/auto-gbdt/search_space_metis.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "num_leaves":{"_type":"choice","_value":[31, 28, 24, 20]}, - "learning_rate":{"_type":"choice","_value":[0.01, 0.05, 0.1, 0.2]}, - "bagging_freq":{"_type":"choice","_value":[1, 2, 4, 8, 10]} -} diff --git a/examples/trials/benchmarking/hyperband/config_hyperband.yml b/examples/trials/benchmarking/hyperband/config_hyperband.yml index a979bad44c..e79e3e0d8e 100644 --- a/examples/trials/benchmarking/hyperband/config_hyperband.yml +++ b/examples/trials/benchmarking/hyperband/config_hyperband.yml @@ -1,27 +1,20 @@ -authorName: default -experimentName: example_mnist_hyperband -trialConcurrency: 2 -maxExecDuration: 100h -maxTrialNum: 10000 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false advisor: - #choice: Hyperband, BOHB - builtinAdvisorName: Hyperband + name: Hyperband classArgs: + optimize_mode: maximize + #R: the maximum trial budget (could be the number of mini-batches or epochs) can be # allocated to a trial. Each trial should use trial budget to control how long it runs. R: 60 + #eta: proportion of discarded trials eta: 3 - #choice: maximize, minimize - optimize_mode: maximize + #choice: serial, parallelism exec_mode: serial -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 + +searchSpaceFile: search_space.json +trialCommand: python3 main.py +trialConcurrency: 10 +trainingService: + platform: local diff --git a/examples/trials/cifar10_pytorch/config.yml b/examples/trials/cifar10_pytorch/config.yml index a44cfafa2b..b70083916e 100644 --- a/examples/trials/cifar10_pytorch/config.yml +++ b/examples/trials/cifar10_pytorch/config.yml @@ -1,23 +1,14 @@ -authorName: default -experimentName: example_pytorch_cifar10 +searchSpaceFile: search_space.json +trialCommand: python3 main.py +trialGpuNumber: 1 trialConcurrency: 4 -maxExecDuration: 100h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 1 -localConfig: - maxTrialNumPerGpu: 2 +trainingService: # For other platforms, check mnist-pytorch example + platform: local + maxTrialNumberPerGpu: 2 + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/cifar10_pytorch/config_pai.yml b/examples/trials/cifar10_pytorch/config_pai.yml deleted file mode 100644 index 58f9bf5b51..0000000000 --- a/examples/trials/cifar10_pytorch/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_pytorch_cifar10 -trialConcurrency: 1 -maxExecDuration: 100h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 1 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 diff --git a/examples/trials/cifar10_pytorch/search_space.json b/examples/trials/cifar10_pytorch/search_space.json index 723e49b6b9..562f041183 100644 --- a/examples/trials/cifar10_pytorch/search_space.json +++ b/examples/trials/cifar10_pytorch/search_space.json @@ -1,5 +1,14 @@ { - "lr":{"_type":"choice", "_value":[0.1, 0.01, 0.001, 0.0001]}, - "optimizer":{"_type":"choice", "_value":["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"]}, - "model":{"_type":"choice", "_value":["vgg", "resnet18", "googlenet", "densenet121", "mobilenet", "dpn92", "senet18"]} + "lr": { + "_type": "choice", + "_value": [0.1, 0.01, 0.001, 0.0001] + }, + "optimizer": { + "_type": "choice", + "_value": ["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"] + }, + "model": { + "_type": "choice", + "_value": ["vgg", "resnet18", "googlenet", "densenet121", "mobilenet", "dpn92", "senet18"] + } } diff --git a/examples/trials/efficientnet/config.yml b/examples/trials/efficientnet/config.yml new file mode 100644 index 0000000000..46f77b7de8 --- /dev/null +++ b/examples/trials/efficientnet/config.yml @@ -0,0 +1,15 @@ +searchSpaceFile: search_net.json +trialCodeDirectory: EfficientNet-PyTorch +trialCommand: python main.py /data/imagenet -j 12 -a efficientnet --batch-size 48 --lr 0.048 --wd 1e-5 --epochs 5 --request-from-nni +trialGpuNumber: 1 +trialConcurrency: 4 +maxTrialNumber: 100 +tuner: + className: tuner.FixedProductTuner + codeDirectory: . + classArgs: + product: 2 +trainingService: # For other platforms, check mnist-pytorch example + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegputrial: diff --git a/examples/trials/efficientnet/config_local.yml b/examples/trials/efficientnet/config_local.yml deleted file mode 100644 index bbb0978be2..0000000000 --- a/examples/trials/efficientnet/config_local.yml +++ /dev/null @@ -1,18 +0,0 @@ -authorName: unknown -experimentName: example_efficient_net -trialConcurrency: 4 -maxExecDuration: 99999d -maxTrialNum: 100 -trainingServicePlatform: local -searchSpacePath: search_net.json -useAnnotation: false -tuner: - codeDir: . - classFileName: tuner.py - className: FixedProductTuner - classArgs: - product: 2 -trial: - codeDir: EfficientNet-PyTorch - command: python main.py /data/imagenet -j 12 -a efficientnet --batch-size 48 --lr 0.048 --wd 1e-5 --epochs 5 --request-from-nni - gpuNum: 1 diff --git a/examples/trials/efficientnet/config_pai.yml b/examples/trials/efficientnet/config_pai.yml deleted file mode 100644 index d9c4d52450..0000000000 --- a/examples/trials/efficientnet/config_pai.yml +++ /dev/null @@ -1,31 +0,0 @@ -authorName: unknown -experimentName: example_efficient_net -trialConcurrency: 8 -maxExecDuration: 48h -maxTrialNum: 100 -trainingServicePlatform: pai -searchSpacePath: search_net.json -useAnnotation: false -tuner: - codeDir: . - classFileName: tuner.py - className: FixedProductTuner - classArgs: - product: 2 -trial: - codeDir: EfficientNet-PyTorch - command: sh train_imagenet.sh - cpuNum: 4 - memoryMB: 25000 - shmMB: 25000 - gpuNum: 1 - virtualCluster: nni - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -nniManagerIp: -paiConfig: - userName: - token: - host: diff --git a/examples/trials/efficientnet/tuner.py b/examples/trials/efficientnet/tuner.py index 7e5bc8b60c..1917fdcf11 100644 --- a/examples/trials/efficientnet/tuner.py +++ b/examples/trials/efficientnet/tuner.py @@ -1,4 +1,4 @@ -from nni.gridsearch_tuner.gridsearch_tuner import GridSearchTuner +from nni.algorithms.hpo.gridsearch_tuner import GridSearchTuner class FixedProductTuner(GridSearchTuner): diff --git a/examples/trials/ga_squad/config.yml b/examples/trials/ga_squad/config.yml index e276f0633c..dcf8c5e8ed 100644 --- a/examples/trials/ga_squad/config.yml +++ b/examples/trials/ga_squad/config.yml @@ -1,19 +1,13 @@ -authorName: default -experimentName: example_ga_squad +trialCommand: python3 trial.py +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 +maxExperimentDuration: 1h +searchSpace: {} # hard-coded in tuner tuner: - codeDir: ../../tuners/ga_customer_tuner - classFileName: customer_tuner.py - className: CustomerTuner + className: customer_tuner.CustomerTuner + codeDirectory: ../../tuners/ga_customer_tuner classArgs: optimize_mode: maximize -trial: - command: python3 trial.py - codeDir: . - gpuNum: 0 +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/ga_squad/config_pai.yml b/examples/trials/ga_squad/config_pai.yml deleted file mode 100644 index 756acc2e2d..0000000000 --- a/examples/trials/ga_squad/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_ga_squad -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -#choice: true, false -useAnnotation: false -#Your nni_manager ip -nniManagerIp: 10.10.10.10 -tuner: - codeDir: ../../tuners/ga_customer_tuner - classFileName: customer_tuner.py - className: CustomerTuner - classArgs: - optimize_mode: maximize -trial: - command: chmod +x ./download.sh && ./download.sh && python3 trial.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 32869 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: /home/user/mnt - containerNFSMountPath: /mnt/data/user - paiStorageConfigName: confignfs-data -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 diff --git a/examples/trials/kaggle-tgs-salt/config.yml b/examples/trials/kaggle-tgs-salt/config.yml index 1a0db8a51f..d385a3fa4d 100644 --- a/examples/trials/kaggle-tgs-salt/config.yml +++ b/examples/trials/kaggle-tgs-salt/config.yml @@ -1,20 +1,11 @@ -authorName: default -experimentName: example_tgs -trialConcurrency: 2 -maxExecDuration: 10h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false useAnnotation: true +trialCommand: python3 train.py +trialGpuNumber: 0 +trialConcurrency: 2 +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 train.py - codeDir: . - gpuNum: 1 +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-advisor/config_bohb.yml b/examples/trials/mnist-advisor/config_bohb.yml index ca20ccea43..a7502ed6b9 100644 --- a/examples/trials/mnist-advisor/config_bohb.yml +++ b/examples/trials/mnist-advisor/config_bohb.yml @@ -1,23 +1,18 @@ -authorName: default -experimentName: example_mnist_bohb +# Run following command first to install dependencies of BOHB tuner: +# $ python3 -m pip install nni[BOHB] + +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 10h -maxTrialNum: 1000 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxExperimentDuration: 10h +maxTrialNumber: 1000 advisor: - #choice: Hyperband, BOHB - #(BOHB should be installed through nnictl) - builtinAdvisorName: BOHB + name: BOHB classArgs: max_budget: 27 min_budget: 1 eta: 3 optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-advisor/config_hyperband.yml b/examples/trials/mnist-advisor/config_hyperband.yml index fd06a809f5..2cf2de9b32 100644 --- a/examples/trials/mnist-advisor/config_hyperband.yml +++ b/examples/trials/mnist-advisor/config_hyperband.yml @@ -1,27 +1,16 @@ -authorName: default -experimentName: example_mnist_hyperband +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 trialConcurrency: 2 -maxExecDuration: 100h -maxTrialNum: 10000 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxExperimentDuration: 100h +maxTrialNumber: 10000 advisor: - #choice: Hyperband, BOHB - builtinAdvisorName: Hyperband + name: Hyperband classArgs: - #R: the maximum trial budget (could be the number of mini-batches or epochs) can be - # allocated to a trial. Each trial should use trial budget to control how long it runs. - R: 100 - #eta: proportion of discarded trials - eta: 3 - #choice: maximize, minimize - optimize_mode: maximize - #choice: serial, parallelism - exec_mode: parallelism -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 + R: 100 # the maximum trial budget (could be the number of mini-batches or epochs) can be + # allocated to a trial. Each trial should use trial budget to control how long it runs. + eta: 3 # proportion of discarded trials + optimize_mode: maximize # maximize or minimize + exec_mode: parallelism # serial or parallelism +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-advisor/config_pai.yml b/examples/trials/mnist-advisor/config_pai.yml deleted file mode 100644 index 3c122b6fb4..0000000000 --- a/examples/trials/mnist-advisor/config_pai.yml +++ /dev/null @@ -1,41 +0,0 @@ -authorName: default -experimentName: example_mnist_hyperband -maxExecDuration: 1h -maxTrialNum: 10000 -trialConcurrency: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -advisor: - #choice: Hyperband, BOHB - #(BOHB should be installed through nnictl) - builtinAdvisorName: Hyperband - classArgs: - #R: the maximum trial budget - R: 100 - #eta: proportion of discarded trials - eta: 3 - #choice: maximize, minimize - optimize_mode: maximize - #choice: serial, parallelism - exec_mode: parallelism -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: /home/user/mnt - containerNFSMountPath: /mnt/data/user - paiStorageConfigName: confignfs-data -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 diff --git a/examples/trials/mnist-annotation/config.yml b/examples/trials/mnist-annotation/config.yml index b724c7b609..937ec916de 100644 --- a/examples/trials/mnist-annotation/config.yml +++ b/examples/trials/mnist-annotation/config.yml @@ -1,20 +1,12 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false useAnnotation: true +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 1 +maxTrialNumber: 10 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-annotation/config_gpu.yml b/examples/trials/mnist-annotation/config_gpu.yml deleted file mode 100644 index df8abd3a3b..0000000000 --- a/examples/trials/mnist-annotation/config_gpu.yml +++ /dev/null @@ -1,20 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 4 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: true -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 1 diff --git a/examples/trials/mnist-annotation/config_kubeflow.yml b/examples/trials/mnist-annotation/config_kubeflow.yml deleted file mode 100644 index 37ff6f89a5..0000000000 --- a/examples/trials/mnist-annotation/config_kubeflow.yml +++ /dev/null @@ -1,31 +0,0 @@ -authorName: default -experimentName: example_dist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 1 -#choice: local, remote, pai, kubeflow -trainingServicePlatform: kubeflow -#choice: true, false -useAnnotation: true -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - codeDir: . - worker: - replicas: 1 - command: python3 mnist.py - gpuNum: 0 - cpuNum: 1 - memoryMB: 8192 - image: msranni/nni:latest -kubeflowConfig: - operator: tf-operator - apiVersion: v1alpha2 - storage: nfs - nfs: - server: 10.10.10.10 - path: /var/nfs/general \ No newline at end of file diff --git a/examples/trials/mnist-annotation/config_pai.yml b/examples/trials/mnist-annotation/config_pai.yml deleted file mode 100644 index ad27baae86..0000000000 --- a/examples/trials/mnist-annotation/config_pai.yml +++ /dev/null @@ -1,34 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -#choice: true, false -useAnnotation: true -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: /home/user/mnt - containerNFSMountPath: /mnt/data/user - paiStorageConfigName: confignfs-data -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-annotation/config_remote.yml b/examples/trials/mnist-annotation/config_remote.yml deleted file mode 100644 index 359b63044c..0000000000 --- a/examples/trials/mnist-annotation/config_remote.yml +++ /dev/null @@ -1,33 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: remote -#choice: true, false -useAnnotation: true -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 -#machineList can be empty if the platform is local -machineList: - - ip: 10.1.1.1 - username: bob - passwd: bob123 - #port can be skip if using default ssh port 22 - #port: 22 - - ip: 10.1.1.2 - username: bob - passwd: bob123 - - ip: 10.1.1.3 - username: bob - passwd: bob123 diff --git a/examples/trials/mnist-batch-tune-keras/config.yml b/examples/trials/mnist-batch-tune-keras/config.yml index 92a87a4f7c..fae372a7aa 100644 --- a/examples/trials/mnist-batch-tune-keras/config.yml +++ b/examples/trials/mnist-batch-tune-keras/config.yml @@ -1,18 +1,10 @@ -authorName: default -experimentName: example_mnist-keras +searchSpaceFile: search_space.json +trialCommand: python3 mnist-keras.py +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: BatchTuner -trial: - command: python3 mnist-keras.py - codeDir: . - gpuNum: 0 + name: BatchTuner +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-batch-tune-keras/config_pai.yml b/examples/trials/mnist-batch-tune-keras/config_pai.yml deleted file mode 100644 index f3c7586e54..0000000000 --- a/examples/trials/mnist-batch-tune-keras/config_pai.yml +++ /dev/null @@ -1,32 +0,0 @@ -authorName: default -experimentName: example_mnist-keras -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: BatchTuner -trial: - command: python3 mnist-keras.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 diff --git a/examples/trials/mnist-distributed/config_kubeflow.yml b/examples/trials/mnist-distributed-tfv1/config_kubeflow.yml similarity index 100% rename from examples/trials/mnist-distributed/config_kubeflow.yml rename to examples/trials/mnist-distributed-tfv1/config_kubeflow.yml diff --git a/examples/trials/mnist-distributed/dist_mnist.py b/examples/trials/mnist-distributed-tfv1/dist_mnist.py similarity index 100% rename from examples/trials/mnist-distributed/dist_mnist.py rename to examples/trials/mnist-distributed-tfv1/dist_mnist.py diff --git a/examples/trials/mnist-distributed/search_space.json b/examples/trials/mnist-distributed-tfv1/search_space.json similarity index 100% rename from examples/trials/mnist-distributed/search_space.json rename to examples/trials/mnist-distributed-tfv1/search_space.json diff --git a/examples/trials/mnist-keras/config.yml b/examples/trials/mnist-keras/config.yml deleted file mode 100644 index f0f628779f..0000000000 --- a/examples/trials/mnist-keras/config.yml +++ /dev/null @@ -1,21 +0,0 @@ -authorName: default -experimentName: example_mnist-keras -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist-keras.py - codeDir: . - gpuNum: 0 diff --git a/examples/trials/mnist-keras/config_pai.yml b/examples/trials/mnist-keras/config_pai.yml deleted file mode 100644 index 21e8e6030a..0000000000 --- a/examples/trials/mnist-keras/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_mnist-keras -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist-keras.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-keras/mnist-keras.py b/examples/trials/mnist-keras/mnist-keras.py deleted file mode 100644 index 794b7deb2a..0000000000 --- a/examples/trials/mnist-keras/mnist-keras.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) Microsoft Corporation -# All rights reserved. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the "Software"), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -# to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING -# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -import argparse -import logging - -import os -import keras -import numpy as np -from keras import backend as K -from keras.callbacks import TensorBoard -from keras.datasets import mnist -from keras.layers import Conv2D, Dense, Flatten, MaxPooling2D -from keras.models import Sequential - -import nni - -LOG = logging.getLogger('mnist_keras') -K.set_image_data_format('channels_last') -TENSORBOARD_DIR = os.environ['NNI_OUTPUT_DIR'] - -H, W = 28, 28 -NUM_CLASSES = 10 - -def create_mnist_model(hyper_params, input_shape=(H, W, 1), num_classes=NUM_CLASSES): - ''' - Create simple convolutional model - ''' - layers = [ - Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape), - Conv2D(64, (3, 3), activation='relu'), - MaxPooling2D(pool_size=(2, 2)), - Flatten(), - Dense(100, activation='relu'), - Dense(num_classes, activation='softmax') - ] - - model = Sequential(layers) - - if hyper_params['optimizer'] == 'Adam': - optimizer = keras.optimizers.Adam(lr=hyper_params['learning_rate']) - else: - optimizer = keras.optimizers.SGD(lr=hyper_params['learning_rate'], momentum=0.9) - model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy']) - - return model - -def load_mnist_data(args): - ''' - Load MNIST dataset - ''' - mnist_path = os.path.join(os.environ.get('NNI_OUTPUT_DIR'), 'mnist.npz') - (x_train, y_train), (x_test, y_test) = mnist.load_data(path=mnist_path) - os.remove(mnist_path) - - x_train = (np.expand_dims(x_train, -1).astype(np.float) / 255.)[:args.num_train] - x_test = (np.expand_dims(x_test, -1).astype(np.float) / 255.)[:args.num_test] - y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)[:args.num_train] - y_test = keras.utils.to_categorical(y_test, NUM_CLASSES)[:args.num_test] - - LOG.debug('x_train shape: %s', (x_train.shape,)) - LOG.debug('x_test shape: %s', (x_test.shape,)) - - return x_train, y_train, x_test, y_test - -class SendMetrics(keras.callbacks.Callback): - ''' - Keras callback to send metrics to NNI framework - ''' - def on_epoch_end(self, epoch, logs={}): - ''' - Run on end of each epoch - ''' - LOG.debug(logs) - # TensorFlow 2.0 API reference claims the key is `val_acc`, but in fact it's `val_accuracy` - if 'val_acc' in logs: - nni.report_intermediate_result(logs['val_acc']) - else: - nni.report_intermediate_result(logs['val_accuracy']) - -def train(args, params): - ''' - Train model - ''' - x_train, y_train, x_test, y_test = load_mnist_data(args) - model = create_mnist_model(params) - - model.fit(x_train, y_train, batch_size=args.batch_size, epochs=args.epochs, verbose=1, - validation_data=(x_test, y_test), callbacks=[SendMetrics(), TensorBoard(log_dir=TENSORBOARD_DIR)]) - - _, acc = model.evaluate(x_test, y_test, verbose=0) - LOG.debug('Final result is: %d', acc) - nni.report_final_result(acc) - -def generate_default_params(): - ''' - Generate default hyper parameters - ''' - return { - 'optimizer': 'Adam', - 'learning_rate': 0.001 - } - -if __name__ == '__main__': - PARSER = argparse.ArgumentParser() - PARSER.add_argument("--batch_size", type=int, default=200, help="batch size", required=False) - PARSER.add_argument("--epochs", type=int, default=10, help="Train epochs", required=False) - PARSER.add_argument("--num_train", type=int, default=60000, help="Number of train samples to be used, maximum 60000", required=False) - PARSER.add_argument("--num_test", type=int, default=10000, help="Number of test samples to be used, maximum 10000", required=False) - - ARGS, UNKNOWN = PARSER.parse_known_args() - - try: - # get parameters from tuner - RECEIVED_PARAMS = nni.get_next_parameter() - LOG.debug(RECEIVED_PARAMS) - PARAMS = generate_default_params() - PARAMS.update(RECEIVED_PARAMS) - # train - train(ARGS, PARAMS) - except Exception as e: - LOG.exception(e) - raise diff --git a/examples/trials/mnist-keras/search_space.json b/examples/trials/mnist-keras/search_space.json deleted file mode 100644 index 774941a55e..0000000000 --- a/examples/trials/mnist-keras/search_space.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "optimizer":{"_type":"choice","_value":["Adam", "SGD"]}, - "learning_rate":{"_type":"choice","_value":[0.0001, 0.001, 0.002, 0.005, 0.01]} -} diff --git a/examples/trials/mnist-nested-search-space/config.yml b/examples/trials/mnist-nested-search-space/config.yml index 7c1715b97c..2cff01c655 100644 --- a/examples/trials/mnist-nested-search-space/config.yml +++ b/examples/trials/mnist-nested-search-space/config.yml @@ -1,20 +1,14 @@ -authorName: default -experimentName: mnist-nested-search-space +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 trialConcurrency: 2 -maxExecDuration: 1h -maxTrialNum: 100 -#choice: local, remote -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 100 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: # For other platforms, check mnist-pytorch example + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/mnist-nested-search-space/search_space.json b/examples/trials/mnist-nested-search-space/search_space.json index 4f35ddb354..185bc00253 100644 --- a/examples/trials/mnist-nested-search-space/search_space.json +++ b/examples/trials/mnist-nested-search-space/search_space.json @@ -1,114 +1,114 @@ { - "layer0": { - "_type": "choice", - "_value": [{ - "_name": "Empty" - }, - { - "_name": "Conv", - "kernel_size": { - "_type": "choice", - "_value": [1, 2, 3, 5] - } - }, - { - "_name": "Max_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - }, - { - "_name": "Avg_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - } - ] - }, - "layer1": { - "_type": "choice", - "_value": [{ - "_name": "Empty" - }, - { - "_name": "Conv", - "kernel_size": { - "_type": "choice", - "_value": [1, 2, 3, 5] - } - }, - { - "_name": "Max_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - }, - { - "_name": "Avg_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - } - ] - }, - "layer2": { - "_type": "choice", - "_value": [{ - "_name": "Empty" - }, - { - "_name": "Conv", - "kernel_size": { - "_type": "choice", - "_value": [1, 2, 3, 5] - } - }, - { - "_name": "Max_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - }, - { - "_name": "Avg_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - } - ] - }, - "layer3": { - "_type": "choice", - "_value": [{ - "_name": "Empty" - }, - { - "_name": "Conv", - "kernel_size": { - "_type": "choice", - "_value": [1, 2, 3, 5] - } - }, - { - "_name": "Max_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - }, - { - "_name": "Avg_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - } - ] - } -} \ No newline at end of file + "layer0": { + "_type": "choice", + "_value": [{ + "_name": "Empty" + }, + { + "_name": "Conv", + "kernel_size": { + "_type": "choice", + "_value": [1, 2, 3, 5] + } + }, + { + "_name": "Max_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + }, + { + "_name": "Avg_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + } + ] + }, + "layer1": { + "_type": "choice", + "_value": [{ + "_name": "Empty" + }, + { + "_name": "Conv", + "kernel_size": { + "_type": "choice", + "_value": [1, 2, 3, 5] + } + }, + { + "_name": "Max_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + }, + { + "_name": "Avg_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + } + ] + }, + "layer2": { + "_type": "choice", + "_value": [{ + "_name": "Empty" + }, + { + "_name": "Conv", + "kernel_size": { + "_type": "choice", + "_value": [1, 2, 3, 5] + } + }, + { + "_name": "Max_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + }, + { + "_name": "Avg_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + } + ] + }, + "layer3": { + "_type": "choice", + "_value": [{ + "_name": "Empty" + }, + { + "_name": "Conv", + "kernel_size": { + "_type": "choice", + "_value": [1, 2, 3, 5] + } + }, + { + "_name": "Max_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + }, + { + "_name": "Avg_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + } + ] + } +} diff --git a/examples/trials/mnist-pbt-tuner-pytorch/config.yml b/examples/trials/mnist-pbt-tuner-pytorch/config.yml index 508278e69f..8d648c8893 100644 --- a/examples/trials/mnist-pbt-tuner-pytorch/config.yml +++ b/examples/trials/mnist-pbt-tuner-pytorch/config.yml @@ -1,22 +1,14 @@ -authorName: default -experimentName: example_mnist_pbt_tuner_pytorch +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 1 trialConcurrency: 3 -maxExecDuration: 2h -maxTrialNum: 100 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 100 +maxExperimentDuration: 2h tuner: -# codeDir: ~/nni/src/sdk/pynni/nni/pbt_tuner -# classFileName: pbt_tuner.py -# className: PBTTuner - builtinTunerName: PBTTuner + name: PBTTuner classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 1 +trainingService: # For other platforms, check mnist-pytorch example + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/mnist-pytorch/config.yml b/examples/trials/mnist-pytorch/config.yml index 00a95216aa..9f7fac6279 100644 --- a/examples/trials/mnist-pytorch/config.yml +++ b/examples/trials/mnist-pytorch/config.yml @@ -1,21 +1,14 @@ -authorName: default -experimentName: example_mnist_pytorch +# This is the minimal config file for an NNI experiment. +# Use "nnictl create --config config.yml" to launch this experiment. +# Afterwards, you can check "config_detailed.yml" for more explaination. + +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py # NOTE: change "python3" to "python" if you are using Windows +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: + platform: local diff --git a/examples/trials/mnist-pytorch/config_aml.yml b/examples/trials/mnist-pytorch/config_aml.yml index 8a5618606f..3c87d38123 100644 --- a/examples/trials/mnist-pytorch/config_aml.yml +++ b/examples/trials/mnist-pytorch/config_aml.yml @@ -1,25 +1,15 @@ -authorName: default -experimentName: example_mnist_pytorch +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -trainingServicePlatform: aml -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - image: msranni/nni -amlConfig: - subscriptionId: ${replace_to_your_subscriptionId} - resourceGroup: ${replace_to_your_resourceGroup} - workspaceName: ${replace_to_your_workspaceName} - computeTarget: ${replace_to_your_computeTarget} +trainingService: + platform: aml + dockerImage: msranni/nni + subscriptionId: ${your subscription ID} + resourceGroup: ${your resource group} + workspaceName: ${your workspace name} + computeTarget: ${your compute target} diff --git a/examples/trials/mnist-pytorch/config_detailed.yml b/examples/trials/mnist-pytorch/config_detailed.yml new file mode 100644 index 0000000000..4f88ed28fe --- /dev/null +++ b/examples/trials/mnist-pytorch/config_detailed.yml @@ -0,0 +1,42 @@ +# This example shows more configurable fields comparing to the minimal "config.yml" +# You can use "nnictl create --config config_detailed.yml" to launch this experiment. +# If you see an error message saying "port 8080 is used", use "nnictl stop --all" to stop previous experiment. + +# Hyper-parameter search space can either be configured here or in a seperate file. +# "config.yml" shows how to specify a seperate search space file. +# The common schema of search space is documented here: +# https://nni.readthedocs.io/en/stable/Tutorial/SearchSpaceSpec.html +searchSpace: + batch_size: + _type: choice + _value: [16, 32, 64, 128] + hidden_size: + _type: choice + _value: [128, 256, 512, 1024] + lr: + _type: choice + _value: [0.0001, 0.001, 0.01, 0.1] + momentum: + _type: uniform + _value: [0, 1] + +trialCommand: python3 mnist.py # The command to launch a trial. NOTE: change "python3" to "python" if you are using Windows. +trialCodeDirectory: . # The path of trial code. By default it's ".", which means the same directory of this config file. +trialGpuNumber: 1 # How many GPUs should each trial use. CUDA is required when it's greator than zero. + +trialConcurrency: 4 # Run 4 trials concurrently. +maxTrialNumber: 10 # Generate at most 10 trials. +maxExperimentDuration: 1h # Stop generating trials after 1 hour. + +tuner: # Configure the tuning alogrithm. + name: TPE # Supported algorithms: TPE, Random, Anneal, Evolution, GridSearch, GPTuner, PBTTuner, etc. + # Full list: https://nni.readthedocs.io/en/latest/Tuner/BuiltinTuner.html + classArgs: # Algorithm specific arguments. See the tuner's doc for details. + optimize_mode: maximize # "minimize" or "maximize" + +# Configure the training platform. +# Supported platforms: local, remote, openpai, aml, kubeflow, kubernetes, adl. +trainingService: + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Reason and details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/mnist-pytorch/config_openpai.yml b/examples/trials/mnist-pytorch/config_openpai.yml new file mode 100644 index 0000000000..01f90ecfae --- /dev/null +++ b/examples/trials/mnist-pytorch/config_openpai.yml @@ -0,0 +1,20 @@ +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 1 +maxTrialNumber: 10 +tuner: + name: TPE + classArgs: + optimize_mode: maximize +trainingService: + platform: openpai + host: http://123.123.123.123 + username: ${your user name} + token: ${your token} + dockerImage: msranni/nni + trialCpuNumber: 1 + trialMemorySize: 8GB + storageConfigName: ${your storage config name} + localStorageMountPoint: ${NFS mount point on local machine} + containerStorageMountPoint: ${NFS mount point inside Docker container} diff --git a/examples/trials/mnist-pytorch/config_pai.yml b/examples/trials/mnist-pytorch/config_pai.yml deleted file mode 100644 index f821fa3ba2..0000000000 --- a/examples/trials/mnist-pytorch/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_mnist_pytorch -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-pytorch/config_remote.yml b/examples/trials/mnist-pytorch/config_remote.yml new file mode 100644 index 0000000000..42a8546848 --- /dev/null +++ b/examples/trials/mnist-pytorch/config_remote.yml @@ -0,0 +1,24 @@ +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 4 +maxTrialNumber: 20 +tuner: + name: TPE + classArgs: + optimize_mode: maximize +trainingService: + platform: remote + machineList: + - host: ${your server's IP or domain name} + user: ${your user name} + ssh_key_file: ~/.ssh/id_rsa # We recommend public key over password, it's more secure and convenient. + # You can specify more than one SSH servers: + - host: 123.123.123.123 + port: 10022 + user: nniuser + password: 12345 + pythonPath: /usr/bin # Other examples: + # /opt/python3.9/bin + # C:/Python39 + # C:/Users/USERNAME/.conda/envs/ENVNAME;C:/Users/USERNAME/.conda/envs/ENVNAME/Scripts;C:/Users/USERNAME/.conda/envs/ENVNAME/Library/bin diff --git a/examples/trials/mnist-pytorch/config_tensorboard.yml b/examples/trials/mnist-pytorch/config_tensorboard.yml index c067cd16c4..9c8839bca2 100644 --- a/examples/trials/mnist-pytorch/config_tensorboard.yml +++ b/examples/trials/mnist-pytorch/config_tensorboard.yml @@ -1,21 +1,11 @@ -authorName: default -experimentName: example_mnist_pytorch +searchSpaceFile: search_space.json +trialCommand: python3 mnist_tensorboard.py # NOTE: change "python3" to "python" if you are using Windows +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist_tensorboard.py - codeDir: . - gpuNum: 0 +trainingService: + platform: local diff --git a/examples/trials/mnist-pytorch/config_v2.yml b/examples/trials/mnist-pytorch/config_v2.yml deleted file mode 100644 index 370f22a7ea..0000000000 --- a/examples/trials/mnist-pytorch/config_v2.yml +++ /dev/null @@ -1,23 +0,0 @@ -searchSpace: - momentum: - _type: uniform - _value: [0, 1] - hidden_size: - _type: choice - _value: [128, 256, 512, 1024] - batch_size: - _type: choice - _value: [16, 32, 64, 128] - lr: - _type: choice - _value: [0.0001, 0.001, 0.01, 0.1] -trainingService: - platform: local -trialCodeDirectory: . -trialCommand: python3 mnist.py -trialConcurrency: 1 -trialGpuNumber: 0 -tuner: - name: TPE - classArgs: - optimize_mode: maximize diff --git a/examples/trials/mnist-pytorch/config_windows.yml b/examples/trials/mnist-pytorch/config_windows.yml deleted file mode 100644 index ae27d4517b..0000000000 --- a/examples/trials/mnist-pytorch/config_windows.yml +++ /dev/null @@ -1,21 +0,0 @@ -authorName: default -experimentName: example_mnist_pytorch -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python mnist.py - codeDir: . - gpuNum: 0 diff --git a/examples/trials/mnist-sharedstorage/config_nfs.yml b/examples/trials/mnist-sharedstorage/config_nfs.yml index 0a2872c920..2b85f10224 100644 --- a/examples/trials/mnist-sharedstorage/config_nfs.yml +++ b/examples/trials/mnist-sharedstorage/config_nfs.yml @@ -32,4 +32,4 @@ sharedStorage: # usermount means you have already mount this storage on localMountPoint # nnimount means nni will try to mount this storage on localMountPoint # nomount means storage will not mount in local machine, will support partial storages in the future - localMounted: nnimount \ No newline at end of file + localMounted: nnimount diff --git a/examples/trials/mnist-tfv2/config.yml b/examples/trials/mnist-tfv2/config.yml index 06e9af6be3..9f7fac6279 100644 --- a/examples/trials/mnist-tfv2/config.yml +++ b/examples/trials/mnist-tfv2/config.yml @@ -1,17 +1,14 @@ -authorName: NNI Example -experimentName: MNIST TF v2.x +# This is the minimal config file for an NNI experiment. +# Use "nnictl create --config config.yml" to launch this experiment. +# Afterwards, you can check "config_detailed.yml" for more explaination. + +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py # NOTE: change "python3" to "python" if you are using Windows +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -trainingServicePlatform: local # choices: local, remote, pai -searchSpacePath: search_space.json -useAnnotation: false tuner: - builtinTunerName: TPE # choices: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, - # GPTuner, SMAC (SMAC should be installed through nnictl) - classArgs: - optimize_mode: maximize # choices: maximize, minimize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 + name: TPE + classArgs: + optimize_mode: maximize +trainingService: + platform: local diff --git a/examples/trials/mnist-tfv2/config_assessor.yml b/examples/trials/mnist-tfv2/config_assessor.yml index be9ec740ab..1a138d8e17 100644 --- a/examples/trials/mnist-tfv2/config_assessor.yml +++ b/examples/trials/mnist-tfv2/config_assessor.yml @@ -1,27 +1,16 @@ -authorName: NNI Example -experimentName: MNIST TF v2.x with assessor -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 50 -#choice: local, remote -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 2 +maxTrialNumber: 50 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -assessor: - #choice: Medianstop, Curvefitting - builtinAssessorName: Curvefitting +assessor: # Specify early-stop algorithm + name: Curvefitting classArgs: epoch_num: 20 threshold: 0.9 -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: + platform: local diff --git a/examples/trials/mnist-tfv2/config_detailed.yml b/examples/trials/mnist-tfv2/config_detailed.yml new file mode 100644 index 0000000000..17da508b84 --- /dev/null +++ b/examples/trials/mnist-tfv2/config_detailed.yml @@ -0,0 +1,46 @@ +# This example shows more configurable fields comparing to the minimal "config.yml" +# You can use "nnictl create --config config_detailed.yml" to launch this experiment. +# If you see an error message saying "port 8080 is used", use "nnictl stop --all" to stop previous experiment. + +# Hyper-parameter search space can either be configured here or in a seperate file. +# "config.yml" shows how to specify a seperate search space file. +# The common schema of search space is documented here: +# https://nni.readthedocs.io/en/stable/Tutorial/SearchSpaceSpec.html +searchSpace: + dropout_rate: + _type: uniform + _value: [0.5, 0.9] + conv_size: + _type: choice + _value: [2, 3, 5, 7] + hidden_size: + _type: choice + _value: [128, 512, 1024] + batch_size: + _type: choice + _value: [16, 32] + learning_rate: + _type: choice + _value: [0.0001, 0.001, 0.01, 0.1] + +trialCommand: python3 mnist.py # The command to launch a trial. NOTE: change "python3" to "python" if you are using Windows. +trialCodeDirectory: . # The path of trial code. By default it's ".", which means the same directory of this config file. +trialGpuNumber: 1 # How many GPUs should each trial use. CUDA is required when it's greator than zero. + +trialConcurrency: 4 # Run 4 trials concurrently. +maxTrialNumber: 10 # Generate at most 10 trials. +maxExperimentDuration: 1h # Stop generating trials after 1 hour. + +tuner: # Configure the tuning alogrithm. + name: TPE # Supported algorithms: TPE, Random, Anneal, Evolution, GridSearch, GPTuner, PBTTuner, etc. + # Full list: https://nni.readthedocs.io/en/latest/Tuner/BuiltinTuner.html + classArgs: # Algorithm specific arguments. See the tuner's doc for details. + optimize_mode: maximize # "minimize" or "maximize" + +# Configure the training platform. +# Supported platforms: local, remote, openpai, aml, kubeflow, kubernetes, adl. +# You can find config template of some platforms in this directory, and others in mnist-pytorch example. +trainingService: + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Reason and details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/mnist-tfv2/config_remote.yml b/examples/trials/mnist-tfv2/config_remote.yml index 09fb8634fe..42a8546848 100644 --- a/examples/trials/mnist-tfv2/config_remote.yml +++ b/examples/trials/mnist-tfv2/config_remote.yml @@ -1,32 +1,24 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: remote -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 4 +maxTrialNumber: 20 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 -#machineList can be empty if the platform is local -machineList: - - ip: ${replace_to_your_remote_machine_ip} - username: ${replace_to_your_remote_machine_username} - sshKeyPath: ${replace_to_your_remote_machine_sshKeyPath} - # Below are examples of specifying python environment. - # pythonPath: /opt/python3.7/bin - # pythonPath: C:/Python37 - # Below is an example of specifying python environment for windows anaconda user. Multiple paths separated by ';'. - # pythonPath: C:/Users/yourname/.conda/envs/myenv;C:/Users/yourname/.conda/envs/myenv/Scripts;C:/Users/yourname/.conda/envs/myenv/Library/bin - pythonPath: ${replace_to_python_environment_path_in_your_remote_machine} +trainingService: + platform: remote + machineList: + - host: ${your server's IP or domain name} + user: ${your user name} + ssh_key_file: ~/.ssh/id_rsa # We recommend public key over password, it's more secure and convenient. + # You can specify more than one SSH servers: + - host: 123.123.123.123 + port: 10022 + user: nniuser + password: 12345 + pythonPath: /usr/bin # Other examples: + # /opt/python3.9/bin + # C:/Python39 + # C:/Users/USERNAME/.conda/envs/ENVNAME;C:/Users/USERNAME/.conda/envs/ENVNAME/Scripts;C:/Users/USERNAME/.conda/envs/ENVNAME/Library/bin diff --git a/examples/trials/mnist-tfv2/config_v2.yml b/examples/trials/mnist-tfv2/config_v2.yml deleted file mode 100644 index 64ba5c7ec8..0000000000 --- a/examples/trials/mnist-tfv2/config_v2.yml +++ /dev/null @@ -1,26 +0,0 @@ -searchSpace: - dropout_rate: - _type: uniform - _value: [0.5, 0.9] - conv_size: - _type: choice - _value: [2, 3, 5, 7] - hidden_size: - _type: choice - _value: [128, 512, 1024] - batch_size: - _type: choice - _value: [16, 32] - learning_rate: - _type: choice - _value: [0.0001, 0.001, 0.01, 0.1] -trainingService: - platform: local -trialCodeDirectory: . -trialCommand: python3 mnist.py -trialConcurrency: 1 -trialGpuNumber: 0 -tuner: - name: TPE - classArgs: - optimize_mode: maximize diff --git a/examples/trials/mnist-tfv2/config_windows.yml b/examples/trials/mnist-tfv2/config_windows.yml deleted file mode 100644 index f1c12aa135..0000000000 --- a/examples/trials/mnist-tfv2/config_windows.yml +++ /dev/null @@ -1,21 +0,0 @@ -authorName: NNI Example -experimentName: MNIST TF v2.x -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python mnist.py - codeDir: . - gpuNum: 0 diff --git a/examples/trials/network_morphism/FashionMNIST/config.yml b/examples/trials/network_morphism/FashionMNIST/config.yml index f88f7dc57a..70a6d720a9 100644 --- a/examples/trials/network_morphism/FashionMNIST/config.yml +++ b/examples/trials/network_morphism/FashionMNIST/config.yml @@ -1,29 +1,18 @@ -authorName: default -experimentName: example_FashionMNIST-network-morphism +trialCommand: python3 FashionMNIST_keras.py +trialGpuNumber: 1 trialConcurrency: 4 -maxExecDuration: 48h -maxTrialNum: 200 -#choice: local, remote, pai -trainingServicePlatform: local -#searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxExperimentDuration: 48h +maxTrialNumber: 200 +searchSpace: {} # search space of NetworkMorphism is provided via classArgs tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: NetworkMorphism + name: NetworkMorphism classArgs: - #choice: maximize, minimize - optimize_mode: maximize - #for now, this tuner only supports cv domain - task: cv - #input image width - input_width: 28 - #input image channel - input_channel: 1 - #number of classes - n_output_node: 10 -trial: - command: python3 FashionMNIST_keras.py - codeDir: . - gpuNum: 1 + optimize_mode: maximize # maximize or minimize + task: cv # for now, this tuner only supports cv domain + input_width: 28 # input image width + input_channel: 1 # input image channel + n_output_node: 10 # number of classes +trainingService: + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/network_morphism/FashionMNIST/config_pai.yml b/examples/trials/network_morphism/FashionMNIST/config_pai.yml deleted file mode 100644 index 935180163c..0000000000 --- a/examples/trials/network_morphism/FashionMNIST/config_pai.yml +++ /dev/null @@ -1,42 +0,0 @@ -authorName: default -experimentName: example_FashionMNIST-network-morphism -trialConcurrency: 1 -maxExecDuration: 24h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: NetworkMorphism - classArgs: - #choice: maximize, minimize - optimize_mode: maximize - # for now, this tuner only supports cv domain - task: cv - #input image width - input_width: 28 - #input image channel - input_channel: 1 - #number of classes - n_output_node: 10 -trial: - command: python3 FashionMNIST_keras.py - codeDir: . - gpuNum: 1 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/network_morphism/cifar10/config.yml b/examples/trials/network_morphism/cifar10/config.yml index 35a96bb41c..0e6cc00b6f 100644 --- a/examples/trials/network_morphism/cifar10/config.yml +++ b/examples/trials/network_morphism/cifar10/config.yml @@ -1,29 +1,18 @@ -authorName: default -experimentName: example_cifar10-network-morphism +trialCommand: python3 cifar10_keras.py +trialGpuNumber: 1 trialConcurrency: 4 -maxExecDuration: 48h -maxTrialNum: 200 -#choice: local, remote, pai -trainingServicePlatform: local -#searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxExperimentDuration: 48h +maxTrialNumber: 200 +searchSpace: {} # search space of NetworkMorphism is provided via classArgs tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: NetworkMorphism + name: NetworkMorphism classArgs: - #choice: maximize, minimize - optimize_mode: maximize - #for now, this tuner only supports cv domain - task: cv - #input image width - input_width: 32 - #input image channel - input_channel: 3 - #number of classes - n_output_node: 10 -trial: - command: python3 cifar10_keras.py - codeDir: . - gpuNum: 1 + optimize_mode: maximize # maximize or minimize + task: cv # for now, this tuner only supports cv domain + input_width: 32 # input image width + input_channel: 3 # input image channel + n_output_node: 10 # number of classes +trainingService: + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/network_morphism/cifar10/config_pai.yml b/examples/trials/network_morphism/cifar10/config_pai.yml deleted file mode 100644 index a377cc004a..0000000000 --- a/examples/trials/network_morphism/cifar10/config_pai.yml +++ /dev/null @@ -1,42 +0,0 @@ -authorName: default -experimentName: example_cifar10-network-morphism -trialConcurrency: 1 -maxExecDuration: 24h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: NetworkMorphism - classArgs: - #choice: maximize, minimize - optimize_mode: maximize - # for now, this tuner only supports cv domain - task: cv - #input image width - input_width: 32 - #input image channel - input_channel: 3 - #number of classes - n_output_node: 10 -trial: - command: python3 cifar10_keras.py - codeDir: . - gpuNum: 1 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/sklearn/classification/config.yml b/examples/trials/sklearn/classification/config.yml index 2071ee0385..a1e6bec0f6 100644 --- a/examples/trials/sklearn/classification/config.yml +++ b/examples/trials/sklearn/classification/config.yml @@ -1,20 +1,11 @@ -authorName: default -experimentName: example_sklearn-classification +searchSpaceFile: search_space.json +trialCommand: python3 main.py trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 100 -#choice: local, remote -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 100 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 \ No newline at end of file +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/sklearn/classification/config_pai.yml b/examples/trials/sklearn/classification/config_pai.yml deleted file mode 100644 index 764cfbedae..0000000000 --- a/examples/trials/sklearn/classification/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_sklearn -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 100 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner,MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/sklearn/regression/config.yml b/examples/trials/sklearn/regression/config.yml index 35f0c5d617..c3fcf52c3e 100644 --- a/examples/trials/sklearn/regression/config.yml +++ b/examples/trials/sklearn/regression/config.yml @@ -1,20 +1,11 @@ -authorName: default -experimentName: example_sklearn-regression +searchSpaceFile: search_space.json +trialCommand: python3 main.py trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 30 -#choice: local, remote -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 30 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 \ No newline at end of file +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/sklearn/regression/config_pai.yml b/examples/trials/sklearn/regression/config_pai.yml deleted file mode 100644 index 89c6d49c42..0000000000 --- a/examples/trials/sklearn/regression/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_sklearn -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 100 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/nni/experiment/config/base.py b/nni/experiment/config/base.py index ac7b6024c1..d1fc92a26f 100644 --- a/nni/experiment/config/base.py +++ b/nni/experiment/config/base.py @@ -124,7 +124,7 @@ def validate(self) -> None: type_name = str(field.type).replace('typing.', '') optional = any([ type_name.startswith('Optional['), - type_name.startswith('Union[') and 'NoneType' in type_name, + type_name.startswith('Union[') and 'None' in type_name, type_name == 'Any' ]) if value is None: diff --git a/nni/experiment/config/common.py b/nni/experiment/config/common.py index 3937c16bbf..c7cd64a7da 100644 --- a/nni/experiment/config/common.py +++ b/nni/experiment/config/common.py @@ -29,6 +29,8 @@ def validate(self): super().validate() _validate_algo(self) + _canonical_rules = {'code_directory': util.canonical_path} + @dataclass(init=False) class AlgorithmConfig(_AlgorithmConfig): name: str @@ -37,7 +39,7 @@ class AlgorithmConfig(_AlgorithmConfig): @dataclass(init=False) class CustomAlgorithmConfig(_AlgorithmConfig): class_name: str - class_directory: Optional[PathLike] = '.' + code_directory: Optional[PathLike] = '.' class_args: Optional[Dict[str, Any]] = None @@ -67,7 +69,7 @@ class ExperimentConfig(ConfigBase): debug: bool = False log_level: Optional[str] = None experiment_working_directory: PathLike = '~/nni-experiments' - tuner_gpu_indices: Optional[Union[List[int], str]] = None + tuner_gpu_indices: Union[List[int], str, int, None] = None tuner: Optional[_AlgorithmConfig] = None assessor: Optional[_AlgorithmConfig] = None advisor: Optional[_AlgorithmConfig] = None @@ -137,7 +139,7 @@ def _validation_rules(self): 'trial_code_directory': util.canonical_path, 'max_experiment_duration': lambda value: f'{util.parse_time(value)}s' if value is not None else None, 'experiment_working_directory': util.canonical_path, - 'tuner_gpu_indices': lambda value: [int(idx) for idx in value.split(',')] if isinstance(value, str) else value, + 'tuner_gpu_indices': util.canonical_gpu_indices, 'tuner': lambda config: None if config is None or config.name == '_none_' else config.canonical(), 'assessor': lambda config: None if config is None or config.name == '_none_' else config.canonical(), 'advisor': lambda config: None if config is None or config.name == '_none_' else config.canonical(), diff --git a/nni/experiment/config/local.py b/nni/experiment/config/local.py index 01654c7231..90b92093fd 100644 --- a/nni/experiment/config/local.py +++ b/nni/experiment/config/local.py @@ -5,6 +5,7 @@ from typing import List, Optional, Union from .common import TrainingServiceConfig +from . import util __all__ = ['LocalConfig'] @@ -13,10 +14,10 @@ class LocalConfig(TrainingServiceConfig): platform: str = 'local' use_active_gpu: Optional[bool] = None max_trial_number_per_gpu: int = 1 - gpu_indices: Optional[Union[List[int], str]] = None + gpu_indices: Union[List[int], str, int, None] = None _canonical_rules = { - 'gpu_indices': lambda value: [int(idx) for idx in value.split(',')] if isinstance(value, str) else value + 'gpu_indices': util.canonical_gpu_indices } _validation_rules = { diff --git a/nni/experiment/config/remote.py b/nni/experiment/config/remote.py index d2ee34eff5..29c47fcaa8 100644 --- a/nni/experiment/config/remote.py +++ b/nni/experiment/config/remote.py @@ -22,12 +22,12 @@ class RemoteMachineConfig(ConfigBase): ssh_passphrase: Optional[str] = None use_active_gpu: bool = False max_trial_number_per_gpu: int = 1 - gpu_indices: Optional[Union[List[int], str]] = None + gpu_indices: Union[List[int], str, int, None] = None python_path: Optional[str] = None _canonical_rules = { 'ssh_key_file': util.canonical_path, - 'gpu_indices': lambda value: [int(idx) for idx in value.split(',')] if isinstance(value, str) else value, + 'gpu_indices': util.canonical_gpu_indices } _validation_rules = { diff --git a/nni/experiment/config/util.py b/nni/experiment/config/util.py index fa81aedeaa..62a56f6b00 100644 --- a/nni/experiment/config/util.py +++ b/nni/experiment/config/util.py @@ -92,3 +92,10 @@ def _parse_unit(string, target_unit, all_units): value = float(number) * factor return math.ceil(value / all_units[target_unit]) raise ValueError(f'Unsupported unit in "{string}"') + +def canonical_gpu_indices(indices: Union[List[int], str, int, None]) -> Optional[List[int]]: + if isinstance(indices, str): + return [int(idx) for idx in indices.split(',')] + if isinstance(indices, int): + return [indices] + return indices diff --git a/nni/tools/nnictl/launcher.py b/nni/tools/nnictl/launcher.py index 16e88307e5..3d4b82ee3b 100644 --- a/nni/tools/nnictl/launcher.py +++ b/nni/tools/nnictl/launcher.py @@ -403,9 +403,13 @@ def launch_experiment(args, experiment_config, mode, experiment_id, config_versi if not os.path.isdir(path): os.makedirs(path) path = tempfile.mkdtemp(dir=path) - nas_mode = experiment_config['trial'].get('nasMode', 'classic_mode') - code_dir = expand_annotations(experiment_config['trial']['codeDir'], path, nas_mode=nas_mode) - experiment_config['trial']['codeDir'] = code_dir + if config_version == 1: + nas_mode = experiment_config['trial'].get('nasMode', 'classic_mode') + code_dir = expand_annotations(experiment_config['trial']['codeDir'], path, nas_mode=nas_mode) + experiment_config['trial']['codeDir'] = code_dir + else: + code_dir = expand_annotations(experiment_config['trialCodeDirectory'], path) + experiment_config['trialCodeDirectory'] = code_dir search_space = generate_search_space(code_dir) experiment_config['searchSpace'] = search_space assert search_space, ERROR_INFO % 'Generated search space is empty' @@ -513,17 +517,17 @@ def create_experiment(args): config_v2 = _validate_v2(config_yml, config_path) schema = 2 - try: - if schema == 1: - launch_experiment(args, config_v1, 'new', experiment_id, 1) - else: - launch_experiment(args, config_v2, 'new', experiment_id, 2) - except Exception as exception: - restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid') - if restServerPid: - kill_command(restServerPid) - print_error(exception) - exit(1) + #try: + if schema == 1: + launch_experiment(args, config_v1, 'new', experiment_id, 1) + else: + launch_experiment(args, config_v2, 'new', experiment_id, 2) + #except Exception as exception: + # restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid') + # if restServerPid: + # kill_command(restServerPid) + # print_error(exception) + # exit(1) def manage_stopped_experiment(args, mode): '''view a stopped experiment''' diff --git a/nni/tools/package_utils/__init__.py b/nni/tools/package_utils/__init__.py index 9a86cbe75e..7f8f441965 100644 --- a/nni/tools/package_utils/__init__.py +++ b/nni/tools/package_utils/__init__.py @@ -187,7 +187,7 @@ def create_customized_class_instance(class_params): Returns customized class instance. """ - code_dir = class_params.get('classDirectory') + code_dir = class_params.get('codeDirectory') qualified_class_name = class_params.get('className') class_args = class_params.get('classArgs') From 357fa15f432c708371b60bac6c63a01aa22f4012 Mon Sep 17 00:00:00 2001 From: liuzhe Date: Thu, 3 Jun 2021 11:56:47 +0800 Subject: [PATCH 2/7] add removed example back --- examples/trials/mnist-keras/config.yml | 21 +++ examples/trials/mnist-keras/config_pai.yml | 35 +++++ examples/trials/mnist-keras/mnist-keras.py | 137 ++++++++++++++++++ examples/trials/mnist-keras/search_space.json | 4 + 4 files changed, 197 insertions(+) create mode 100644 examples/trials/mnist-keras/config.yml create mode 100644 examples/trials/mnist-keras/config_pai.yml create mode 100644 examples/trials/mnist-keras/mnist-keras.py create mode 100644 examples/trials/mnist-keras/search_space.json diff --git a/examples/trials/mnist-keras/config.yml b/examples/trials/mnist-keras/config.yml new file mode 100644 index 0000000000..f0f628779f --- /dev/null +++ b/examples/trials/mnist-keras/config.yml @@ -0,0 +1,21 @@ +authorName: default +experimentName: example_mnist-keras +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: local +searchSpacePath: search_space.json +#choice: true, false +useAnnotation: false +tuner: + #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: TPE + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 mnist-keras.py + codeDir: . + gpuNum: 0 diff --git a/examples/trials/mnist-keras/config_pai.yml b/examples/trials/mnist-keras/config_pai.yml new file mode 100644 index 0000000000..21e8e6030a --- /dev/null +++ b/examples/trials/mnist-keras/config_pai.yml @@ -0,0 +1,35 @@ +authorName: default +experimentName: example_mnist-keras +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: pai +searchSpacePath: search_space.json +#choice: true, false +useAnnotation: false +tuner: + #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: TPE + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 mnist-keras.py + codeDir: . + gpuNum: 0 + cpuNum: 1 + memoryMB: 8196 + #The docker image to run nni job on pai + image: msranni/nni:latest + nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} + containerNFSMountPath: {replace_to_your_container_mount_path} + paiStorageConfigName: {replace_to_your_storage_config_name} +paiConfig: + #The username to login pai + userName: username + #The token to login pai + token: token + #The host of restful server of pai + host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-keras/mnist-keras.py b/examples/trials/mnist-keras/mnist-keras.py new file mode 100644 index 0000000000..794b7deb2a --- /dev/null +++ b/examples/trials/mnist-keras/mnist-keras.py @@ -0,0 +1,137 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import argparse +import logging + +import os +import keras +import numpy as np +from keras import backend as K +from keras.callbacks import TensorBoard +from keras.datasets import mnist +from keras.layers import Conv2D, Dense, Flatten, MaxPooling2D +from keras.models import Sequential + +import nni + +LOG = logging.getLogger('mnist_keras') +K.set_image_data_format('channels_last') +TENSORBOARD_DIR = os.environ['NNI_OUTPUT_DIR'] + +H, W = 28, 28 +NUM_CLASSES = 10 + +def create_mnist_model(hyper_params, input_shape=(H, W, 1), num_classes=NUM_CLASSES): + ''' + Create simple convolutional model + ''' + layers = [ + Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape), + Conv2D(64, (3, 3), activation='relu'), + MaxPooling2D(pool_size=(2, 2)), + Flatten(), + Dense(100, activation='relu'), + Dense(num_classes, activation='softmax') + ] + + model = Sequential(layers) + + if hyper_params['optimizer'] == 'Adam': + optimizer = keras.optimizers.Adam(lr=hyper_params['learning_rate']) + else: + optimizer = keras.optimizers.SGD(lr=hyper_params['learning_rate'], momentum=0.9) + model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy']) + + return model + +def load_mnist_data(args): + ''' + Load MNIST dataset + ''' + mnist_path = os.path.join(os.environ.get('NNI_OUTPUT_DIR'), 'mnist.npz') + (x_train, y_train), (x_test, y_test) = mnist.load_data(path=mnist_path) + os.remove(mnist_path) + + x_train = (np.expand_dims(x_train, -1).astype(np.float) / 255.)[:args.num_train] + x_test = (np.expand_dims(x_test, -1).astype(np.float) / 255.)[:args.num_test] + y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)[:args.num_train] + y_test = keras.utils.to_categorical(y_test, NUM_CLASSES)[:args.num_test] + + LOG.debug('x_train shape: %s', (x_train.shape,)) + LOG.debug('x_test shape: %s', (x_test.shape,)) + + return x_train, y_train, x_test, y_test + +class SendMetrics(keras.callbacks.Callback): + ''' + Keras callback to send metrics to NNI framework + ''' + def on_epoch_end(self, epoch, logs={}): + ''' + Run on end of each epoch + ''' + LOG.debug(logs) + # TensorFlow 2.0 API reference claims the key is `val_acc`, but in fact it's `val_accuracy` + if 'val_acc' in logs: + nni.report_intermediate_result(logs['val_acc']) + else: + nni.report_intermediate_result(logs['val_accuracy']) + +def train(args, params): + ''' + Train model + ''' + x_train, y_train, x_test, y_test = load_mnist_data(args) + model = create_mnist_model(params) + + model.fit(x_train, y_train, batch_size=args.batch_size, epochs=args.epochs, verbose=1, + validation_data=(x_test, y_test), callbacks=[SendMetrics(), TensorBoard(log_dir=TENSORBOARD_DIR)]) + + _, acc = model.evaluate(x_test, y_test, verbose=0) + LOG.debug('Final result is: %d', acc) + nni.report_final_result(acc) + +def generate_default_params(): + ''' + Generate default hyper parameters + ''' + return { + 'optimizer': 'Adam', + 'learning_rate': 0.001 + } + +if __name__ == '__main__': + PARSER = argparse.ArgumentParser() + PARSER.add_argument("--batch_size", type=int, default=200, help="batch size", required=False) + PARSER.add_argument("--epochs", type=int, default=10, help="Train epochs", required=False) + PARSER.add_argument("--num_train", type=int, default=60000, help="Number of train samples to be used, maximum 60000", required=False) + PARSER.add_argument("--num_test", type=int, default=10000, help="Number of test samples to be used, maximum 10000", required=False) + + ARGS, UNKNOWN = PARSER.parse_known_args() + + try: + # get parameters from tuner + RECEIVED_PARAMS = nni.get_next_parameter() + LOG.debug(RECEIVED_PARAMS) + PARAMS = generate_default_params() + PARAMS.update(RECEIVED_PARAMS) + # train + train(ARGS, PARAMS) + except Exception as e: + LOG.exception(e) + raise diff --git a/examples/trials/mnist-keras/search_space.json b/examples/trials/mnist-keras/search_space.json new file mode 100644 index 0000000000..774941a55e --- /dev/null +++ b/examples/trials/mnist-keras/search_space.json @@ -0,0 +1,4 @@ +{ + "optimizer":{"_type":"choice","_value":["Adam", "SGD"]}, + "learning_rate":{"_type":"choice","_value":[0.0001, 0.001, 0.002, 0.005, 0.01]} +} From cb449e4912a1cd3cf728c6934b0692edb0496ea7 Mon Sep 17 00:00:00 2001 From: liuzhe Date: Thu, 3 Jun 2021 15:42:42 +0800 Subject: [PATCH 3/7] bugfix --- nni/experiment/config/convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nni/experiment/config/convert.py b/nni/experiment/config/convert.py index 06db0670f7..0b4b332d08 100644 --- a/nni/experiment/config/convert.py +++ b/nni/experiment/config/convert.py @@ -249,13 +249,13 @@ def convert_algo(algo_type, v1, v2): v2_algo = AlgorithmConfig(name=builtin_name, class_args=class_args) else: - class_directory = util.canonical_path(v1_algo.pop('codeDir')) + code_directory = util.canonical_path(v1_algo.pop('codeDir')) class_file_name = v1_algo.pop('classFileName') assert class_file_name.endswith('.py') class_name = class_file_name[:-3] + '.' + v1_algo.pop('className') v2_algo = CustomAlgorithmConfig( class_name=class_name, - class_directory=class_directory, + code_directory=code_directory, class_args=class_args ) From d5ca817a4ddee536776d2f16f542bf0de43d46a0 Mon Sep 17 00:00:00 2001 From: liuzhe Date: Thu, 3 Jun 2021 15:47:06 +0800 Subject: [PATCH 4/7] remove debug code --- nni/tools/nnictl/launcher.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/nni/tools/nnictl/launcher.py b/nni/tools/nnictl/launcher.py index 3d4b82ee3b..9144bc0267 100644 --- a/nni/tools/nnictl/launcher.py +++ b/nni/tools/nnictl/launcher.py @@ -517,17 +517,17 @@ def create_experiment(args): config_v2 = _validate_v2(config_yml, config_path) schema = 2 - #try: - if schema == 1: - launch_experiment(args, config_v1, 'new', experiment_id, 1) - else: - launch_experiment(args, config_v2, 'new', experiment_id, 2) - #except Exception as exception: - # restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid') - # if restServerPid: - # kill_command(restServerPid) - # print_error(exception) - # exit(1) + try: + if schema == 1: + launch_experiment(args, config_v1, 'new', experiment_id, 1) + else: + launch_experiment(args, config_v2, 'new', experiment_id, 2) + except Exception as exception: + restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid') + if restServerPid: + kill_command(restServerPid) + print_error(exception) + exit(1) def manage_stopped_experiment(args, mode): '''view a stopped experiment''' From 42dc35068fa7512c2271edae677cd72d304ae480 Mon Sep 17 00:00:00 2001 From: liuzhe Date: Mon, 7 Jun 2021 13:50:56 +0800 Subject: [PATCH 5/7] python -> python3 --- examples/trials/efficientnet/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/trials/efficientnet/config.yml b/examples/trials/efficientnet/config.yml index 46f77b7de8..0849b74477 100644 --- a/examples/trials/efficientnet/config.yml +++ b/examples/trials/efficientnet/config.yml @@ -1,6 +1,6 @@ searchSpaceFile: search_net.json trialCodeDirectory: EfficientNet-PyTorch -trialCommand: python main.py /data/imagenet -j 12 -a efficientnet --batch-size 48 --lr 0.048 --wd 1e-5 --epochs 5 --request-from-nni +trialCommand: python3 main.py /data/imagenet -j 12 -a efficientnet --batch-size 48 --lr 0.048 --wd 1e-5 --epochs 5 --request-from-nni trialGpuNumber: 1 trialConcurrency: 4 maxTrialNumber: 100 From 1df4f6684059c4d201213b7bba06d7e8dfc1ae55 Mon Sep 17 00:00:00 2001 From: liuzhe Date: Mon, 7 Jun 2021 15:21:11 +0800 Subject: [PATCH 6/7] add hybrid example --- examples/trials/mnist-pytorch/.gitignore | 1 + .../trials/mnist-pytorch/config_hybrid.yml | 23 +++++++++++++++++++ examples/trials/mnist-tfv2/config_hybrid.yml | 23 +++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 examples/trials/mnist-pytorch/.gitignore create mode 100644 examples/trials/mnist-pytorch/config_hybrid.yml create mode 100644 examples/trials/mnist-tfv2/config_hybrid.yml diff --git a/examples/trials/mnist-pytorch/.gitignore b/examples/trials/mnist-pytorch/.gitignore new file mode 100644 index 0000000000..1269488f7f --- /dev/null +++ b/examples/trials/mnist-pytorch/.gitignore @@ -0,0 +1 @@ +data diff --git a/examples/trials/mnist-pytorch/config_hybrid.yml b/examples/trials/mnist-pytorch/config_hybrid.yml new file mode 100644 index 0000000000..1ae3a2cc3f --- /dev/null +++ b/examples/trials/mnist-pytorch/config_hybrid.yml @@ -0,0 +1,23 @@ +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 5 +maxTrialNumber: 20 +tuner: + name: TPE + classArgs: + optimize_mode: maximize +# For local, remote, openpai, and aml, NNI can use multiple training services at one time +trainingService: + - platform: local + - platform: remote + machineList: + - host: ${your server's IP or domain name} + user: ${your user name} + ssh_key_file: ~/.ssh/id_rsa + - platform: aml + dockerImage: msranni/nni + subscriptionId: ${your subscription ID} + resourceGroup: ${your resource group} + workspaceName: ${your workspace name} + computeTarget: ${your compute target} diff --git a/examples/trials/mnist-tfv2/config_hybrid.yml b/examples/trials/mnist-tfv2/config_hybrid.yml new file mode 100644 index 0000000000..1ae3a2cc3f --- /dev/null +++ b/examples/trials/mnist-tfv2/config_hybrid.yml @@ -0,0 +1,23 @@ +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 5 +maxTrialNumber: 20 +tuner: + name: TPE + classArgs: + optimize_mode: maximize +# For local, remote, openpai, and aml, NNI can use multiple training services at one time +trainingService: + - platform: local + - platform: remote + machineList: + - host: ${your server's IP or domain name} + user: ${your user name} + ssh_key_file: ~/.ssh/id_rsa + - platform: aml + dockerImage: msranni/nni + subscriptionId: ${your subscription ID} + resourceGroup: ${your resource group} + workspaceName: ${your workspace name} + computeTarget: ${your compute target} From e43f044d5df91e08eeaead1e58321de276c3b5a4 Mon Sep 17 00:00:00 2001 From: liuzhe Date: Tue, 8 Jun 2021 10:38:19 +0800 Subject: [PATCH 7/7] fix typo and add name field to detailed example --- examples/trials/mnist-pytorch/config.yml | 2 +- examples/trials/mnist-pytorch/config_detailed.yml | 6 ++++-- examples/trials/mnist-tfv2/config.yml | 2 +- examples/trials/mnist-tfv2/config_detailed.yml | 6 ++++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/trials/mnist-pytorch/config.yml b/examples/trials/mnist-pytorch/config.yml index 9f7fac6279..7fd35c0e9a 100644 --- a/examples/trials/mnist-pytorch/config.yml +++ b/examples/trials/mnist-pytorch/config.yml @@ -1,6 +1,6 @@ # This is the minimal config file for an NNI experiment. # Use "nnictl create --config config.yml" to launch this experiment. -# Afterwards, you can check "config_detailed.yml" for more explaination. +# Afterwards, you can check "config_detailed.yml" for more explanation. searchSpaceFile: search_space.json trialCommand: python3 mnist.py # NOTE: change "python3" to "python" if you are using Windows diff --git a/examples/trials/mnist-pytorch/config_detailed.yml b/examples/trials/mnist-pytorch/config_detailed.yml index 4f88ed28fe..69dbf7ec1d 100644 --- a/examples/trials/mnist-pytorch/config_detailed.yml +++ b/examples/trials/mnist-pytorch/config_detailed.yml @@ -1,6 +1,8 @@ # This example shows more configurable fields comparing to the minimal "config.yml" # You can use "nnictl create --config config_detailed.yml" to launch this experiment. -# If you see an error message saying "port 8080 is used", use "nnictl stop --all" to stop previous experiment. +# If you see an error message saying "port 8080 is used", use "nnictl stop --all" to stop previous experiments. + +name: MNIST # An optional name to help you distinguish experiments. # Hyper-parameter search space can either be configured here or in a seperate file. # "config.yml" shows how to specify a seperate search space file. @@ -22,7 +24,7 @@ searchSpace: trialCommand: python3 mnist.py # The command to launch a trial. NOTE: change "python3" to "python" if you are using Windows. trialCodeDirectory: . # The path of trial code. By default it's ".", which means the same directory of this config file. -trialGpuNumber: 1 # How many GPUs should each trial use. CUDA is required when it's greator than zero. +trialGpuNumber: 1 # How many GPUs should each trial use. CUDA is required when it's greater than zero. trialConcurrency: 4 # Run 4 trials concurrently. maxTrialNumber: 10 # Generate at most 10 trials. diff --git a/examples/trials/mnist-tfv2/config.yml b/examples/trials/mnist-tfv2/config.yml index 9f7fac6279..7fd35c0e9a 100644 --- a/examples/trials/mnist-tfv2/config.yml +++ b/examples/trials/mnist-tfv2/config.yml @@ -1,6 +1,6 @@ # This is the minimal config file for an NNI experiment. # Use "nnictl create --config config.yml" to launch this experiment. -# Afterwards, you can check "config_detailed.yml" for more explaination. +# Afterwards, you can check "config_detailed.yml" for more explanation. searchSpaceFile: search_space.json trialCommand: python3 mnist.py # NOTE: change "python3" to "python" if you are using Windows diff --git a/examples/trials/mnist-tfv2/config_detailed.yml b/examples/trials/mnist-tfv2/config_detailed.yml index 17da508b84..77ce535bcc 100644 --- a/examples/trials/mnist-tfv2/config_detailed.yml +++ b/examples/trials/mnist-tfv2/config_detailed.yml @@ -1,6 +1,8 @@ # This example shows more configurable fields comparing to the minimal "config.yml" # You can use "nnictl create --config config_detailed.yml" to launch this experiment. -# If you see an error message saying "port 8080 is used", use "nnictl stop --all" to stop previous experiment. +# If you see an error message saying "port 8080 is used", use "nnictl stop --all" to stop previous experiments. + +name: MNIST # An optional name to help you distinguish experiments. # Hyper-parameter search space can either be configured here or in a seperate file. # "config.yml" shows how to specify a seperate search space file. @@ -25,7 +27,7 @@ searchSpace: trialCommand: python3 mnist.py # The command to launch a trial. NOTE: change "python3" to "python" if you are using Windows. trialCodeDirectory: . # The path of trial code. By default it's ".", which means the same directory of this config file. -trialGpuNumber: 1 # How many GPUs should each trial use. CUDA is required when it's greator than zero. +trialGpuNumber: 1 # How many GPUs should each trial use. CUDA is required when it's greater than zero. trialConcurrency: 4 # Run 4 trials concurrently. maxTrialNumber: 10 # Generate at most 10 trials.