forked from microsoft/seismic-deeplearning
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_distributed.sh
executable file
·59 lines (50 loc) · 2.29 KB
/
run_distributed.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
# number of GPUs to train on
NGPU=8
# specify pretrained HRNet backbone
PRETRAINED_HRNET='/home/maxkaz/models/hrnetv2_w48_imagenet_pretrained.pth'
# DATA_F3='/home/alfred/data/dutch_f3/data'
# DATA_PENOBSCOT='/home/maxkaz/data/penobscot'
DATA_F3='/storage/data/dutchf3/data'
DATA_PENOBSCOT='/storage/data/penobscot'
# subdirectory where results are written
OUTPUT_DIR='output'
unset CUDA_VISIBLE_DEVICES
# bug to fix conda not launching from a bash shell
source /data/anaconda/etc/profile.d/conda.sh
conda activate seismic-interpretation
export PYTHONPATH=/storage/repos/forks/seismic-deeplearning-1/interpretation:$PYTHONPATH
cd experiments/interpretation/dutchf3_patch/distributed/
# patch based without skip connections
nohup time python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
'DATASET.ROOT' "${DATA_F3}" \
'TRAIN.DEPTH' 'none' \
'OUTPUT_DIR' "${OUTPUT_DIR}" 'TRAIN.MODEL_DIR' 'no_depth' \
--cfg=configs/patch_deconvnet.yaml > patch_deconvnet.log 2>&1
# patch based with skip connections
nohup time python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
'DATASET.ROOT' "${DATA_F3}" \
'TRAIN.DEPTH' 'none' \
'OUTPUT_DIR' "${OUTPUT_DIR}" 'TRAIN.MODEL_DIR' 'no_depth' \
--cfg=configs/patch_deconvnet_skip.yaml > patch_deconvnet_skip.log 2>&1
# squeeze excitation resnet unet + section depth
nohup time python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
'DATASET.ROOT' "${DATA_F3}" \
'TRAIN.DEPTH' 'section' \
'OUTPUT_DIR' "${OUTPUT_DIR}" 'TRAIN.MODEL_DIR' 'section_depth' \
--cfg=configs/seresnet_unet.yaml > seresnet_unet.log 2>&1
# HRNet + patch depth
nohup time python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
'DATASET.ROOT' "${DATA_F3}" \
'TRAIN.DEPTH' 'patch' \
'MODEL.PRETRAINED' "${PRETRAINED_HRNET}" \
'OUTPUT_DIR' "${OUTPUT_DIR}" 'TRAIN.MODEL_DIR' 'patch_depth' \
--cfg=configs/hrnet.yaml > hrnet_patch.log 2>&1
# HRNet + section depth
nohup time python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
'DATASET.ROOT' "${DATA_F3}" \
'TRAIN.DEPTH' 'section' \
'MODEL.PRETRAINED' "${PRETRAINED_HRNET}" \
'OUTPUT_DIR' "${OUTPUT_DIR}" 'TRAIN.MODEL_DIR' 'section_depth' \
--cfg=configs/hrnet.yaml > hrnet_section.log 2>&1
echo "TADA"