Skip to content
This repository has been archived by the owner on Nov 16, 2023. It is now read-only.

Data flow tests #375

Merged
merged 21 commits into from
Jun 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 39 additions & 9 deletions experiments/interpretation/dutchf3_patch/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def _patch_label_2d(

# dump the data right before it's being put into the model and after scoring
if debug:
outdir = f"debug/batch_{split}"
outdir = f"debug/test/batch_{split}"
generate_path(outdir)
for i in range(batch.shape[0]):
path_prefix = f"{outdir}/{batch_indexes[i][0]}_{batch_indexes[i][1]}"
Expand All @@ -251,7 +251,7 @@ def _patch_label_2d(


def _evaluate_split(
split, section_aug, model, pre_processing, output_processing, device, running_metrics_overall, config, debug=False,
split, section_aug, model, pre_processing, output_processing, device, running_metrics_overall, config, data_flow, debug=False,
):
logger = logging.getLogger(__name__)

Expand All @@ -267,28 +267,40 @@ def _evaluate_split(

n_classes = test_set.n_classes

if debug:
data_flow[split] = dict()
data_flow[split]['test_section_loader_length'] = len(test_set)
data_flow[split]['test_input_shape'] = test_set.seismic.shape
data_flow[split]['test_label_shape'] = test_set.labels.shape
data_flow[split]['n_classes'] = n_classes


test_loader = data.DataLoader(test_set, batch_size=1, num_workers=config.WORKERS, shuffle=False)

if debug:
data_flow[split]['test_loader_length'] = len(test_loader)
logger.info("Running in Debug/Test mode")
test_loader = take(2, test_loader)
take_n = 2
test_loader = take(take_n, test_loader)
data_flow[split]['take_n_sections'] = take_n
pred_list, gt_list, img_list = [], [], []


try:
output_dir = generate_path(
f"debug/{config.OUTPUT_DIR}_test_{split}", git_branch(), git_hash(), config.MODEL.NAME, current_datetime(),
f"{config.OUTPUT_DIR}/test/{split}", git_branch(), git_hash(), config.MODEL.NAME, current_datetime(),
)
except:
output_dir = generate_path(f"debug/{config.OUTPUT_DIR}_test_{split}", config.MODEL.NAME, current_datetime(),)
output_dir = generate_path(f"{config.OUTPUT_DIR}/test/{split}", config.MODEL.NAME, current_datetime(),)

running_metrics_split = runningScore(n_classes)


# evaluation mode:
with torch.no_grad(): # operations inside don't track history
model.eval()
total_iteration = 0
for i, (images, labels) in enumerate(test_loader):
logger.info(f"split: {split}, section: {i}")
total_iteration = total_iteration + 1
outputs = _patch_label_2d(
model,
images,
Expand All @@ -306,13 +318,23 @@ def _evaluate_split(

pred = outputs.detach().max(1)[1].numpy()
gt = labels.numpy()
if debug:
pred_list.append((pred.shape, len(np.unique(pred))))
gt_list.append((gt.shape, len(np.unique(gt))))
img_list.append(images.numpy().shape)

running_metrics_split.update(gt, pred)
running_metrics_overall.update(gt, pred)

# dump images to disk for review
mask_to_disk(pred.squeeze(), os.path.join(output_dir, f"{i}_pred.png"), n_classes)
mask_to_disk(gt.squeeze(), os.path.join(output_dir, f"{i}_gt.png"), n_classes)

if debug:
data_flow[split]['pred_shape'] = pred_list
data_flow[split]['gt_shape'] = gt_list
data_flow[split]['img_shape'] = img_list

# get scores
score, class_iou = running_metrics_split.get_scores()

Expand Down Expand Up @@ -363,7 +385,7 @@ def test(*options, cfg=None, debug=False):
load_log_configuration(config.LOG_CONFIG)
logger = logging.getLogger(__name__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
log_dir, model_name = os.path.split(config.TEST.MODEL_PATH)
log_dir, _ = os.path.split(config.TEST.MODEL_PATH)

# load model:
model = getattr(models, config.MODEL.NAME).get_seg_model(config)
Expand Down Expand Up @@ -396,6 +418,7 @@ def test(*options, cfg=None, debug=False):
output_processing = _output_processing_pipeline(config)

splits = ["test1", "test2"] if "Both" in config.TEST.SPLIT else [config.TEST.SPLIT]
data_flow = dict()
for sdx, split in enumerate(splits):
labels = np.load(path.join(config.DATASET.ROOT, "test_once", split + "_labels.npy"))
section_file = path.join(config.DATASET.ROOT, "splits", "section_" + split + ".txt")
Expand All @@ -409,9 +432,17 @@ def test(*options, cfg=None, debug=False):
device,
running_metrics_overall,
config,
data_flow,
debug=debug,
)

if debug:
config_file_name = "default_config" if not cfg else cfg.split("/")[-1].split(".")[0]

fname = f"data_flow_test_{config_file_name}_{config.TRAIN.MODEL_DIR}.json"
with open(fname, 'w') as f:
json.dump(data_flow, f, indent=1)

# FINAL TEST RESULTS:
score, class_iou = running_metrics_overall.get_scores()

Expand All @@ -434,7 +465,6 @@ def test(*options, cfg=None, debug=False):
np.savetxt(path.join(log_dir, "confusion.csv"), confusion, delimiter=" ")

if debug:
config_file_name = "default_config" if not cfg else cfg.split("/")[-1].split(".")[0]
fname = f"metrics_test_{config_file_name}_{config.TRAIN.MODEL_DIR}.json"
with open(fname, "w") as fid:
json.dump(
Expand Down
23 changes: 20 additions & 3 deletions experiments/interpretation/dutchf3_patch/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,6 @@ def run(*options, cfg=None, local_rank=0, debug=False, input=None, distributed=F
# Set CUDNN benchmark mode:
torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK

# We will write the model under outputs / config_file_name / model_dir
config_file_name = "default_config" if not cfg else cfg.split("/")[-1].split(".")[0]

# Fix random seeds:
torch.manual_seed(config.SEED)
if torch.cuda.is_available():
Expand Down Expand Up @@ -155,16 +152,28 @@ def run(*options, cfg=None, local_rank=0, debug=False, input=None, distributed=F

n_classes = train_set.n_classes
val_set = TrainPatchLoader(config, split="val", is_transform=True, augmentations=val_aug, debug=debug,)

logger.info(val_set)

if debug:
data_flow_dict = dict()

data_flow_dict['train_patch_loader_length'] = len(train_set)
data_flow_dict['validation_patch_loader_length'] = len(val_set)
data_flow_dict['train_input_shape'] = train_set.seismic.shape
data_flow_dict['train_label_shape'] = train_set.labels.shape
data_flow_dict['n_classes'] = n_classes

logger.info("Running in debug mode..")
train_range = min(config.TRAIN.BATCH_SIZE_PER_GPU * config.NUM_DEBUG_BATCHES, len(train_set))
logging.info(f"train range in debug mode {train_range}")
train_set = data.Subset(train_set, range(train_range))
valid_range = min(config.VALIDATION.BATCH_SIZE_PER_GPU, len(val_set))
val_set = data.Subset(val_set, range(valid_range))

data_flow_dict['train_length_subset'] = len(train_set)
data_flow_dict['validation_length_subset'] = len(val_set)

train_sampler = torch.utils.data.distributed.DistributedSampler(train_set, num_replicas=world_size, rank=local_rank)
val_sampler = torch.utils.data.distributed.DistributedSampler(val_set, num_replicas=world_size, rank=local_rank)

Expand All @@ -175,6 +184,14 @@ def run(*options, cfg=None, local_rank=0, debug=False, input=None, distributed=F
val_set, batch_size=config.VALIDATION.BATCH_SIZE_PER_GPU, num_workers=config.WORKERS, sampler=val_sampler
)

if debug:
data_flow_dict['train_loader_length'] = len(train_loader)
data_flow_dict['validation_loader_length'] = len(val_loader)
config_file_name = "default_config" if not cfg else cfg.split("/")[-1].split(".")[0]
fname = f"data_flow_train_{config_file_name}_{config.TRAIN.MODEL_DIR}.json"
with open(fname, 'w') as f:
json.dump(data_flow_dict, f, indent=2)

# Model:
model = getattr(models, config.MODEL.NAME).get_seg_model(config)
device = "cuda" if torch.cuda.is_available() else "cpu"
Expand Down
13 changes: 5 additions & 8 deletions interpretation/deepseismic_interpretation/dutchf3/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def __getitem__(self, index):
im, lbl = _transform_WH_to_HW(im), _transform_WH_to_HW(lbl)

if self.debug and "test" in self.split:
outdir = f"debug/sectionLoader_{self.split}_raw"
outdir = f"debug/test/sectionLoader_{self.split}_raw"
generate_path(outdir)
path_prefix = f"{outdir}/index_{index}_section_{section_name}"
image_to_disk(im, path_prefix + "_img.png", self.MIN, self.MAX)
Expand All @@ -167,7 +167,7 @@ def __getitem__(self, index):
im, lbl = self.transform(im, lbl)

if self.debug and "test" in self.split:
outdir = f"debug/sectionLoader_{self.split}_{'aug' if self.augmentations is not None else 'noaug'}"
outdir = f"debug/test/sectionLoader_{self.split}_{'aug' if self.augmentations is not None else 'noaug'}"
generate_path(outdir)
path_prefix = f"{outdir}/index_{index}_section_{section_name}"
image_to_disk(np.array(im[0]), path_prefix + "_img.png", self.MIN, self.MAX)
Expand Down Expand Up @@ -397,7 +397,7 @@ def __getitem__(self, index):

# dump images before augmentation
if self.debug:
outdir = f"debug/testSectionLoaderWithDepth_{self.split}_raw"
outdir = f"debug/test/testSectionLoaderWithDepth_{self.split}_raw"
generate_path(outdir)
# this needs to take the first dimension of image (no depth) but lbl only has 1 dim
path_prefix = f"{outdir}/index_{index}_section_{section_name}"
Expand All @@ -416,7 +416,7 @@ def __getitem__(self, index):
# dump images and labels to disk after augmentation
if self.debug:
outdir = (
f"debug/testSectionLoaderWithDepth_{self.split}_{'aug' if self.augmentations is not None else 'noaug'}"
f"debug/test/testSectionLoaderWithDepth_{self.split}_{'aug' if self.augmentations is not None else 'noaug'}"
)
generate_path(outdir)
path_prefix = f"{outdir}/index_{index}_section_{section_name}"
Expand Down Expand Up @@ -773,9 +773,6 @@ def __repr__(self):
"patch": TrainPatchLoaderWithDepth,
}

_TRAIN_SECTION_LOADERS = {"section": TrainSectionLoaderWithDepth}


def get_patch_loader(cfg):
assert str(cfg.TRAIN.DEPTH).lower() in [
"section",
Expand All @@ -785,6 +782,7 @@ def get_patch_loader(cfg):
Valid values: section, patch, none."
return _TRAIN_PATCH_LOADERS.get(cfg.TRAIN.DEPTH, TrainPatchLoader)

_TRAIN_SECTION_LOADERS = {"section": TrainSectionLoaderWithDepth}

def get_section_loader(cfg):
assert str(cfg.TRAIN.DEPTH).lower() in [
Expand All @@ -797,7 +795,6 @@ def get_section_loader(cfg):

_TEST_LOADERS = {"section": TestSectionLoaderWithDepth}


def get_test_loader(cfg):
logger = logging.getLogger(__name__)
logger.info(f"Test loader {cfg.TRAIN.DEPTH}")
Expand Down
30 changes: 26 additions & 4 deletions tests/cicd/main_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,15 @@ jobs:
echo "cv_lib unit test job passed"

###################################################################################################
# Stage 3: Dutch F3 patch models on checkerboard test set:
# Stage 3: Patch models on checkerboard test set:
# deconvnet, unet, HRNet patch depth, HRNet section depth
# CAUTION: reverted these builds to single-GPU leaving new multi-GPU code in to be reverted later
###################################################################################################

- job: checkerboard_dutchf3_patch
- job: checkerboard_patch
dependsOn: cv_lib_unit_tests_job
timeoutInMinutes: 60
displayName: Checkerboard Dutch F3 patch local
displayName: Checkerboard patch local
pool:
name: deepseismicagentpool
steps:
Expand All @@ -148,6 +148,7 @@ jobs:
'TRAIN.END_EPOCH' 2 'TRAIN.SNAPSHOTS' 1 \
'DATASET.NUM_CLASSES' 2 'DATASET.CLASS_WEIGHTS' '[1.0, 1.0]' \
'TRAIN.DEPTH' 'none' \
'TRAIN.BATCH_SIZE_PER_GPU' 16 'VALIDATION.BATCH_SIZE_PER_GPU' 32 \
'OUTPUT_DIR' 'output' 'TRAIN.MODEL_DIR' 'no_depth' \
'WORKERS' 1 \
--cfg=configs/patch_deconvnet.yaml --debug ; echo "$?" > "$dir/$BASHPID"; }
Expand All @@ -158,6 +159,7 @@ jobs:
'TRAIN.END_EPOCH' 1 'TRAIN.SNAPSHOTS' 1 \
'DATASET.NUM_CLASSES' 2 'DATASET.CLASS_WEIGHTS' '[1.0, 1.0]' \
'TRAIN.DEPTH' 'section' \
'TRAIN.BATCH_SIZE_PER_GPU' 16 'VALIDATION.BATCH_SIZE_PER_GPU' 32 \
'OUTPUT_DIR' 'output' 'TRAIN.MODEL_DIR' 'section_depth' \
'WORKERS' 1 \
--cfg=configs/unet.yaml --debug ; echo "$?" > "$dir/$BASHPID"; }
Expand All @@ -168,6 +170,7 @@ jobs:
'TRAIN.END_EPOCH' 1 'TRAIN.SNAPSHOTS' 1 \
'DATASET.NUM_CLASSES' 2 'DATASET.CLASS_WEIGHTS' '[1.0, 1.0]' \
'TRAIN.DEPTH' 'section' \
'TRAIN.BATCH_SIZE_PER_GPU' 16 'VALIDATION.BATCH_SIZE_PER_GPU' 32 \
'OUTPUT_DIR' 'output' 'TRAIN.MODEL_DIR' 'section_depth' \
'WORKERS' 1 \
--cfg=configs/seresnet_unet.yaml --debug ; echo "$?" > "$dir/$BASHPID"; }
Expand All @@ -178,6 +181,7 @@ jobs:
'TRAIN.END_EPOCH' 2 'TRAIN.SNAPSHOTS' 1 \
'DATASET.NUM_CLASSES' 2 'DATASET.CLASS_WEIGHTS' '[1.0, 1.0]' \
'TRAIN.DEPTH' 'section' \
'TRAIN.BATCH_SIZE_PER_GPU' 16 'VALIDATION.BATCH_SIZE_PER_GPU' 32 \
'MODEL.PRETRAINED' '/home/alfred/models/hrnetv2_w48_imagenet_pretrained.pth' \
'OUTPUT_DIR' 'output' 'TRAIN.MODEL_DIR' 'section_depth' \
'WORKERS' 1 \
Expand All @@ -195,6 +199,16 @@ jobs:

# Remove the temporary directory
rm -r "$dir"


set -e
python ../../../tests/cicd/src/check_data_flow.py --infile data_flow_train_patch_deconvnet_no_depth.json --step train --train_depth none
python ../../../tests/cicd/src/check_data_flow.py --infile data_flow_train_unet_section_depth.json --step train --train_depth section
python ../../../tests/cicd/src/check_data_flow.py --infile data_flow_train_seresnet_unet_section_depth.json --step train --train_depth section
python ../../../tests/cicd/src/check_data_flow.py --infile data_flow_train_hrnet_section_depth.json --step train --train_depth section
set +e



# check validation set performance
set -e
Expand Down Expand Up @@ -286,6 +300,14 @@ jobs:
# Remove the temporary directory
rm -r "$dir"

# check data flow for test
set -e
python ../../../tests/cicd/src/check_data_flow.py --infile data_flow_test_patch_deconvnet_no_depth.json --step test --train_depth none
python ../../../tests/cicd/src/check_data_flow.py --infile data_flow_test_unet_section_depth.json --step test --train_depth section
python ../../../tests/cicd/src/check_data_flow.py --infile data_flow_test_seresnet_unet_section_depth.json --step test --train_depth section
python ../../../tests/cicd/src/check_data_flow.py --infile data_flow_test_hrnet_section_depth.json --step test --train_depth section
set +e

# check test set performance
set -e
# TODO: enable this after investigating reproducibility problem of patch_deconvnet for small-size training data
Expand All @@ -304,7 +326,7 @@ jobs:
###################################################################################################

- job: F3_block_training_and_evaluation_local_notebook
dependsOn: checkerboard_dutchf3_patch
dependsOn: checkerboard_patch
timeoutInMinutes: 5
displayName: F3 block training and evaluation local notebook
pool:
Expand Down
Loading