Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Running attention_net_validation_loss #8

Merged
merged 40 commits into from
Dec 9, 2020
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
1c16216
running and evaluation of attention model
ggantos Oct 9, 2020
20190ae
commit before Pycharm
ggantos Oct 12, 2020
3ac6758
running hyperparameter tuning
ggantos Oct 27, 2020
7c57776
adding validation loss function
ggantos Oct 30, 2020
849b0ff
losses unit test and fixed loss calcs
ggantos Nov 3, 2020
221af10
ready to start test runs for attn model
ggantos Nov 3, 2020
c6995ad
getting attn running
ggantos Nov 3, 2020
d0717a0
getting attention net loss to work with correct shape
ggantos Nov 3, 2020
b4fd4d6
working on losses_test
ggantos Nov 5, 2020
50ca278
adding optuna objective function
ggantos Nov 5, 2020
61192fe
running optuna on casper
ggantos Nov 5, 2020
8c43751
adding callbacks to optuna
ggantos Nov 5, 2020
788d0b8
adding callbacks to objective.py
ggantos Nov 5, 2020
9e27f5b
fixing losses.py
ggantos Nov 5, 2020
7d3d105
fixing optuna callback
ggantos Nov 5, 2020
f764cca
adding get_callbacks import
ggantos Nov 5, 2020
6465d1a
fixing callback bug
ggantos Nov 5, 2020
fc47701
fixing small typos
ggantos Nov 5, 2020
6aac819
True for gpu
ggantos Nov 5, 2020
fd586f0
running attention
ggantos Nov 6, 2020
f8c456b
running attention with attention_net_validation_loss
ggantos Nov 6, 2020
88348e5
getting Optuna running
ggantos Nov 6, 2020
ecec712
deleteing unnecessary notebook
ggantos Nov 6, 2020
1adeabd
plotting evaluation of net_attention_validation_loss function
ggantos Nov 6, 2020
9bb3723
adding saves to objective.py
ggantos Nov 9, 2020
3ad21a6
adding in xarray import
ggantos Nov 9, 2020
a98720f
adding imports
ggantos Nov 9, 2020
2fba929
adding more imports
ggantos Nov 9, 2020
2306388
adding even more imports
ggantos Nov 10, 2020
eb41bc4
running attn tests
ggantos Nov 10, 2020
b8b35cd
fixing typo
ggantos Nov 10, 2020
b7cea77
getting Optuna running
ggantos Nov 10, 2020
69e2512
running attention_net_validation_loss models
ggantos Nov 10, 2020
0d60f7a
renaming loss funcitons
ggantos Nov 11, 2020
05c3c9b
testing eager execution false
ggantos Nov 13, 2020
8fdc2bb
loss calculated from predicted real particles
ggantos Nov 17, 2020
95dd9c6
evaluating loss function from predicted particle perspective
ggantos Nov 24, 2020
d7c4ddb
random_valid_outputs scaled between 0 and 1
ggantos Nov 24, 2020
9c8dcdd
running attn with scaled random outputs
ggantos Nov 24, 2020
4ffa87f
fixing items loss for PR review
ggantos Dec 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
*.out
*.o
*.out.*
batch**.sh
*.o*
.idea
holodecml.egg-info/
data/
models/

6 changes: 3 additions & 3 deletions config/attn.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
path_data: "/glade/p/cisl/aiml/ai4ess_hackathon/holodec/"
path_save: "/glade/p/cisl/aiml/holodec/testing/attention/"
path_save: "/glade/p/cisl/aiml/ggantos/holodec/attn/predicted/"
model_name: "attn"
num_particles: "large"
random_seed: 328942
Expand All @@ -16,9 +16,9 @@ attention_network:
hidden_neurons: 100
activation: "relu"
min_filters: 16
output_num: 5
output_num: 5
train:
learning_rate: 0.001
epochs: 20
epochs: 2
batch_size: 64
verbose: 1
33 changes: 33 additions & 0 deletions config/attn_optuna.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
path_data: "/glade/p/cisl/aiml/ai4ess_hackathon/holodec/"
path_save: "/glade/p/cisl/aiml/ggantos/holodec/optuna/test/"
model_name: "attn"
num_particles: "large"
random_seed: 328942
output_cols: ["x", "y", "z", "d", "hid"]
scaler_out: "MinMaxScaler"
num_z_bins: False
subset: 0.1
mass: False
metric: "mae"
noisy_sd: 0.1
attention_network:
activation: "relu"
output_num: 5
train:
epochs: 100
batch_size: 64
verbose: 1
callbacks:
EarlyStopping:
monitor: "val_loss"
patience: 3
ReduceLROnPlateau:
monitor: "val_loss"
factor: 0.2
patience: 1
min_lr: 0.0000001
mode: "auto"
CSVLogger:
filename: "/glade/p/cisl/aiml/ggantos/holodec/optuna/test/training.txt"
separator: " "
append: True
58 changes: 58 additions & 0 deletions config/hyperparameter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
log:
save_path: "/glade/p/cisl/aiml/ggantos/holodec/optuna/test/log.txt"

slurm:
jobs: 20
kernel: "ncar_pylib ncar_20200417"
batch:
account: "NAML0001"
gres: "gpu:v100:1"
mem: "256G"
n: 8
t: "12:00:00"
J: "hyper_opt"
o: "hyper_opt.out"
e: "hyper_opt.err"

optuna:
name: "holodec_optimization.db"
reload: 0
objective: "/glade/work/ggantos/holodec-ml/scripts/ggantos/objective.py"
direction: "minimize"
metric: "val_loss"
n_trials: 20
gpu: True
save_path: "/glade/p/cisl/aiml/ggantos/holodec/optuna/test/"
sampler:
type: "TPESampler"
parameters:
attention_neurons:
type: "int"
settings:
name: "attention_neurons"
low: 50
high: 150
hidden_layers:
type: "int"
settings:
name: "hidden_layers"
low: 1
high: 4
hidden_neurons:
type: "int"
settings:
name: "hidden_neurons"
low: 50
high: 150
min_filters:
type: "int"
settings:
name: "min_filters"
low: 8
high: 24
learning_rate:
type: "float"
settings:
name: "learning_rate"
low: 0.00001
high: 0.01
21 changes: 21 additions & 0 deletions config/zdist_FT_radavg.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
path_data: "/glade/p/cisl/aiml/ai4ess_hackathon/holodec/ft_rad_bidis_z/"
path_save: "/glade/p/cisl/aiml/ggantos/holodec/ft_rad_bidis_models/z/"
model_name: "cnn"
random_seed: 328942
input_variable: 'input_image'
label_variable: 'histogram'
metric: "mae"
conv2d_network:
filters: [4, 8, 16]
kernel_sizes: [5, 5, 5]
conv2d_activation: "relu"
pool_sizes: [0, 0, 0]
dense_sizes: [64, 32, 16]
dense_activation: "elu"
lr: 0.001
optimizer: "adam"
loss: "categorical_crossentropy"
batch_size: 256
metrics: ["TP","FP","TN","FN"]
epochs: 20
verbose: 1
54 changes: 40 additions & 14 deletions holodecml/data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import os
import random
import xarray as xr
import numpy as np
import pandas as pd
from datetime import datetime
import socket

import numpy as np
import xarray as xr

num_particles_dict = {
1 : '1particle',
Expand All @@ -17,6 +15,12 @@
'test' : 'test',
'valid': 'validation'}

def get_dataset_path():
if 'casper' in socket.gethostname():
return "/glade/p/cisl/aiml/ai4ess_hackathon/holodec/"
else:
return "/Users/ggantos/PycharmProjects/holodec-ml/data/"

def dataset_name(num_particles, split, file_extension='nc'):
"""
Return the dataset filename given user inputs
Expand Down Expand Up @@ -185,24 +189,44 @@ def calc_z_bins(train_outputs, valid_outputs, num_z_bins):
z_bins = np.linspace(z_min, z_max, num_z_bins)
return z_bins

# added this because the previous code allowed a different max_particle size
# depending on which split df was opened and the subset
def get_max_particles(path_data, num_particles, output_cols):
ds = open_dataset(path_data, num_particles, "train")
outputs = ds[output_cols].to_dataframe()
max_particles = outputs['hid'].value_counts().max()
return max_particles

# updated function to create the entire dataset template at one time to
# decrease overhead and eliminate setting random seeds
def make_template(df, num_images):
max_particles = df['hid'].value_counts().max()
size = (max_particles * num_images, 1)
def make_template(df, num_images, max_particles):
size = (num_images * max_particles, 1)
x = np.random.uniform(low=df['x'].min(), high=df['x'].max(), size=size)
y = np.random.uniform(low=df['y'].min(), high=df['y'].max(), size=size)
z = np.random.uniform(low=df['z'].min(), high=df['z'].max(), size=size)
d = np.random.uniform(low=df['d'].min(), high=df['d'].max(), size=size)
prob = np.zeros(d.shape)
template = np.hstack((x, y ,z ,d ,prob))
template = template.reshape((num_images, max_particles, -1))
return template
return template

def make_random_valid_outputs(path_data, num_particles, num_images,
max_particles):
df = open_dataset(path_data, num_particles, 'valid')
size = (num_images * max_particles, 1)
x = np.random.uniform(low=df['x'].min(), high=df['x'].max(), size=size)
y = np.random.uniform(low=df['y'].min(), high=df['y'].max(), size=size)
z = np.random.uniform(low=df['z'].min(), high=df['z'].max(), size=size)
d = np.random.uniform(low=df['d'].min(), high=df['d'].max(), size=size)
prob = np.zeros(d.shape)
template = np.hstack((x, y, z, d, prob))
template = template.reshape((num_images, max_particles, -1))
return template

# cycles through dataset by "hid" to overwrite random data generated in
# make_template with actual data and classification of 1
def outputs_3d(outputs, num_images):
outputs_array = make_template(outputs, num_images)
def outputs_3d(outputs, num_images, max_particles):
outputs_array = make_template(outputs, num_images, max_particles)
for hid in outputs["hid"].unique():
outputs_hid = outputs.loc[outputs['hid'] == hid].to_numpy()
outputs_hid[:, -1] = 1
Expand Down Expand Up @@ -231,7 +255,6 @@ def load_scaled_datasets(path_data, num_particles, output_cols,
valid_inputs: (np array) Valid input data scaled between 0 and 1
valid_outputs: (np array) Scaled valid output data
"""

train_inputs,\
train_outputs = load_raw_datasets(path_data, num_particles, 'train',
output_cols, subset)
Expand Down Expand Up @@ -259,10 +282,13 @@ def load_scaled_datasets(path_data, num_particles, output_cols,
else:
if train_inputs.shape[0] != train_outputs.shape[0]:
col = [c for c in output_cols if c != 'hid']
max_particles = get_max_particles(path_data, num_particles, output_cols)
train_outputs[col] = scaler_out.fit_transform(train_outputs[col])
train_outputs = outputs_3d(train_outputs, train_inputs.shape[0])
train_outputs = outputs_3d(train_outputs, train_inputs.shape[0],
max_particles)
valid_outputs[col] = scaler_out.transform(valid_outputs[col])
valid_outputs = outputs_3d(valid_outputs, valid_inputs.shape[0])
valid_outputs = outputs_3d(valid_outputs, valid_inputs.shape[0],
max_particles)
else:
train_outputs.drop(['hid'], axis=1)
train_outputs = scaler_out.fit_transform(train_outputs)
Expand Down
111 changes: 101 additions & 10 deletions holodecml/losses.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,136 @@
import logging
import tensorflow as tf
from typing import List, Dict
import tensorflow.keras.backend as K

logger = logging.getLogger(__name__)


class SymmetricCrossEntropy:

def __init__(self, a: float = 1.0, b: float = 1.0) -> None:
self.a = a
self.b = b

def __call__(self, *args, **kwargs) -> float:
bce = tf.keras.losses.CategoricalCrossentropy()
kld = tf.keras.losses.KLDivergence()
return self.a * bce(*args, **kwargs) + self.b * kld(*args, **kwargs)


def rmse(y_true, y_pred):
return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))


def wmse(y_true, y_pred):
return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))


def R2(y_true, y_pred):
""" Is actually 1 - R2
"""
SS_res = K.sum(K.square(y_true - y_pred))
SS_res = K.sum(K.square(y_true - y_pred))
SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
return SS_res/(SS_tot + K.epsilon())
return SS_res / (SS_tot + K.epsilon())


def keras_mse(y_true, y_pred):
return K.mean(K.square(y_pred - y_true))


def attention_net_loss(y_true, y_pred):
def noisy_true_particle_loss(y_true, y_pred):
# y_true and y_pred will have shape (batch_size x max_num_particles x 5)
loss_real = tf.reduce_mean(tf.abs(y_true[y_true[:, :, -1] > 0] - y_pred[y_true[:, :, -1] > 0]))
loss_bce = binary_crossentropy(y_true[:,:,-1],
y_pred[:,:,-1])
loss_bce = tf.keras.losses.binary_crossentropy(tf.reshape(y_true[:, :, -1],[-1]),
tf.reshape(y_pred[:, :, -1],[-1]))
loss_total = loss_real + loss_bce
return loss_total

def random_particle_distance_loss(y_true, y_pred):
loss_dist = tf.zeros((), dtype=tf.float32)
loss_diam = tf.zeros((), dtype=tf.float32)
loss_prob = tf.zeros((), dtype=tf.float32)
loss_bce = tf.zeros((), dtype=tf.float32)

for h in range(tf.shape(y_true)[0]):
y_true_h = y_true[h:h + 1][y_true[h:h + 1, :, -1] > 0]
dist_x = (y_true_h[:, 0:1] - tf.transpose(y_pred)[0:1, :, h]) ** 2
dist_y = (y_true_h[:, 1:2] - tf.transpose(y_pred)[1:2, :, h]) ** 2
dist_z = (y_true_h[:, 2:3] - tf.transpose(y_pred)[2:3, :, h]) ** 2
dist_squared = tf.math.sqrt(dist_x + dist_y + dist_z)
loss_dist_h = tf.math.reduce_sum(tf.math.reduce_min(dist_squared, axis=1))
loss_dist = loss_dist + loss_dist_h

max_idx = tf.cast(tf.math.argmin(dist_squared, axis=1), dtype=tf.int32)
max_idx_2d = tf.stack((tf.range(tf.shape(dist_squared)[0]), max_idx), axis=-1)

dist_d = (y_true_h[:, 3:4] - tf.transpose(y_pred)[3:4, :, h]) ** 2
loss_diam_h = tf.math.reduce_sum(tf.gather_nd(dist_d, max_idx_2d))
loss_diam = loss_diam + loss_diam_h

dist_p = (y_true_h[:, 4:5] - tf.transpose(y_pred)[4:5, :, h]) ** 2
loss_prob_h = tf.math.reduce_sum(tf.gather_nd(dist_p, max_idx_2d))
loss_prob = loss_prob + loss_prob_h

y_pred_h_bce = y_pred[h, :, -1]
loss_bce_h = tf.keras.losses.binary_crossentropy(y_true_h[:, -1],
tf.gather(y_pred_h_bce,max_idx))
loss_bce = loss_bce + loss_bce_h

loss_dist = loss_dist/tf.cast(tf.shape(y_true)[0], dtype=tf.float32)
loss_diam = loss_diam/tf.cast(tf.shape(y_true)[0], dtype=tf.float32)
loss_prob = loss_prob/tf.cast(tf.shape(y_true)[0], dtype=tf.float32)
loss_bce = loss_bce/tf.cast(tf.shape(y_true)[0], dtype=tf.float32)

valid_error = loss_dist + loss_diam + loss_bce

return valid_error

def predicted_particle_distance_loss(y_true, y_pred):
loss_dist = tf.zeros((), dtype=tf.float32)
loss_diam = tf.zeros((), dtype=tf.float32)
loss_prob = tf.zeros((), dtype=tf.float32)
loss_bce = tf.zeros((), dtype=tf.float32)

for h in range(tf.shape(y_pred)[0]):
print(f"Mean: {tf.math.reduce_mean(y_pred[h:h + 1, :, -1])}")
print(f"Mean: {tf.math.reduce_mean(y_true[h:h + 1, :, -1])}")
print(f"Min: {tf.math.reduce_min(y_pred[h:h + 1, :, -1])}")
print(f"Min: {tf.math.reduce_min(y_true[h:h + 1, :, -1])}")
print(f"Max: {tf.math.reduce_max(y_pred[h:h + 1, :, -1])}")
print(f"Max: {tf.math.reduce_max(y_true[h:h + 1, :, -1])}")
y_pred_h = y_pred[h:h + 1][y_pred[h:h + 1, :, -1] > 0.5]
dist_x = (y_pred_h[:, 0:1] - tf.transpose(y_true)[0:1, :, h]) ** 2
dist_y = (y_pred_h[:, 1:2] - tf.transpose(y_true)[1:2, :, h]) ** 2
dist_z = (y_pred_h[:, 2:3] - tf.transpose(y_true)[2:3, :, h]) ** 2
dist_squared = tf.math.sqrt(dist_x + dist_y + dist_z)
loss_dist_h = tf.math.reduce_sum(tf.math.reduce_min(dist_squared, axis=1))
loss_dist = loss_dist + loss_dist_h

max_idx = tf.cast(tf.math.argmin(dist_squared, axis=1), dtype=tf.int32)
max_idx_2d = tf.stack((tf.range(tf.shape(dist_squared)[0]), max_idx), axis=-1)

dist_d = (y_pred_h[:, 3:4] - tf.transpose(y_true)[3:4, :, h]) ** 2
loss_diam_h = tf.math.reduce_sum(tf.gather_nd(dist_d, max_idx_2d))
loss_diam = loss_diam + loss_diam_h

dist_p = (y_pred_h[:, 4:5] - tf.transpose(y_true)[4:5, :, h]) ** 2
loss_prob_h = tf.math.reduce_sum(tf.gather_nd(dist_p, max_idx_2d))
loss_prob = loss_prob + loss_prob_h

y_true_h_bce = y_true[h, :, -1]
loss_bce_h = tf.keras.losses.binary_crossentropy(y_pred_h[:, -1],
tf.gather(y_true_h_bce,max_idx))
loss_bce = loss_bce + loss_bce_h

loss_dist = loss_dist/tf.cast(tf.shape(y_pred)[0], dtype=tf.float32)
loss_diam = loss_diam/tf.cast(tf.shape(y_pred)[0], dtype=tf.float32)
loss_prob = loss_prob/tf.cast(tf.shape(y_pred)[0], dtype=tf.float32)
loss_bce = loss_bce/tf.cast(tf.shape(y_pred)[0], dtype=tf.float32)

# print(f"loss_dist: {loss_dist}\ttf.shape(loss_dist): {tf.shape(loss_dist)}")
# print(f"loss_diam: {loss_diam}\ttf.shape(loss_diam): {tf.shape(loss_diam)}")
# print(f"loss_dist: {loss_prob}\ttf.shape(loss_prob): {tf.shape(loss_prob)}")
# print(f"loss_dist: {loss_bce}\ttf.shape(loss_bce): {tf.shape(loss_bce)}")
valid_error = loss_dist + loss_diam + loss_bce

return valid_error
Loading