Merge pull request #8 from NCAR/ggantos

Running attention_net_validation_loss
NCAR · Dec 9, 2020 · 89bf6e3 · 89bf6e3
2 parents 8e731f0 + 4ffa87f
commit 89bf6e3
Show file tree

Hide file tree

Showing 22 changed files with 2,542 additions and 29,779 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,7 +4,9 @@
 *.out
 *.o
 *.out.*
-batch**.sh
 *.o*
 .idea
 holodecml.egg-info/
+data/
+models/
+
diff --git a/config/attn.yml b/config/attn.yml
@@ -1,12 +1,12 @@
 path_data: "/glade/p/cisl/aiml/ai4ess_hackathon/holodec/"
-path_save: "/glade/p/cisl/aiml/holodec/testing/attention/"
+path_save: "/glade/p/cisl/aiml/ggantos/holodec/attn_new/argmin/"
 model_name: "attn"
 num_particles: "large"
 random_seed: 328942
 output_cols: ["x", "y", "z", "d", "hid"]
 scaler_out: "MinMaxScaler"
 num_z_bins: False
-subset: 0.1
+subset: False
 mass: False
 metric: "mae"
 noisy_sd: 0.1
@@ -16,9 +16,9 @@ attention_network:
   hidden_neurons: 100
   activation: "relu"
   min_filters: 16
-  output_num: 5 
+  output_num: 5
 train:
   learning_rate: 0.001
-  epochs: 20
-  batch_size: 64
+  epochs: 40
+  batch_size: 16
   verbose: 1
diff --git a/config/attn_optuna.yml b/config/attn_optuna.yml
@@ -0,0 +1,33 @@
+path_data: "/glade/p/cisl/aiml/ai4ess_hackathon/holodec/"
+path_save: "/glade/p/cisl/aiml/ggantos/holodec/optuna/test/"
+model_name: "attn"
+num_particles: "large"
+random_seed: 328942
+output_cols: ["x", "y", "z", "d", "hid"]
+scaler_out: "MinMaxScaler"
+num_z_bins: False
+subset: 0.1
+mass: False
+metric: "mae"
+noisy_sd: 0.1
+attention_network:
+  activation: "relu"
+  output_num: 5
+train:
+  epochs: 100
+  batch_size: 64
+  verbose: 1
+callbacks:
+  EarlyStopping:
+    monitor: "val_loss"
+    patience: 3
+  ReduceLROnPlateau:
+    monitor: "val_loss"
+    factor: 0.2
+    patience: 1
+    min_lr: 0.0000001
+    mode: "auto"
+  CSVLogger:
+    filename: "/glade/p/cisl/aiml/ggantos/holodec/optuna/test/training.txt"
+    separator: " "
+    append: True
diff --git a/config/hyperparameter.yml b/config/hyperparameter.yml
@@ -0,0 +1,58 @@
+log:
+  save_path: "/glade/p/cisl/aiml/ggantos/holodec/optuna/test/log.txt"
+
+slurm:
+  jobs: 20
+  kernel: "ncar_pylib ncar_20200417"
+  batch:
+    account: "NAML0001"
+    gres: "gpu:v100:1"
+    mem: "256G"
+    n: 8
+    t: "12:00:00"
+    J: "hyper_opt"
+    o: "hyper_opt.out"
+    e: "hyper_opt.err"
+
+optuna:
+  name: "holodec_optimization.db"
+  reload: 0
+  objective: "/glade/work/ggantos/holodec-ml/scripts/ggantos/objective.py"
+  direction: "minimize"
+  metric: "val_loss"
+  n_trials: 20
+  gpu: True
+  save_path: "/glade/p/cisl/aiml/ggantos/holodec/optuna/test/"
+  sampler:
+    type: "TPESampler"
+  parameters:
+    attention_neurons:
+      type: "int"
+      settings:
+          name: "attention_neurons"
+          low: 50
+          high: 150
+    hidden_layers:
+      type: "int"
+      settings:
+        name: "hidden_layers"
+        low: 1
+        high: 4
+    hidden_neurons:
+      type: "int"
+      settings:
+        name: "hidden_neurons"
+        low: 50
+        high: 150
+    min_filters:
+      type: "int"
+      settings:
+        name: "min_filters"
+        low: 8
+        high: 24
+    learning_rate:
+      type: "float"
+      settings:
+        name: "learning_rate"
+        low: 0.00001
+        high: 0.01
diff --git a/config/zdist_FT_radavg.yml b/config/zdist_FT_radavg.yml
@@ -0,0 +1,21 @@
+path_data: "/glade/p/cisl/aiml/ai4ess_hackathon/holodec/ft_rad_bidis_z/"
+path_save: "/glade/p/cisl/aiml/ggantos/holodec/ft_rad_bidis_models/z/"
+model_name: "cnn"
+random_seed: 328942
+input_variable: 'input_image'
+label_variable: 'histogram'
+metric: "mae"
+conv2d_network:
+  filters: [4, 8, 16]
+  kernel_sizes: [5, 5, 5]
+  conv2d_activation: "relu"
+  pool_sizes: [0, 0, 0]
+  dense_sizes: [64, 32, 16]
+  dense_activation: "elu"
+  lr: 0.001
+  optimizer: "adam"
+  loss: "categorical_crossentropy"
+  batch_size: 256
+  metrics: ["TP","FP","TN","FN"]
+  epochs: 20
+  verbose: 1
diff --git a/holodecml/data.py b/holodecml/data.py
@@ -1,10 +1,8 @@
 import os
-import random
-import xarray as xr
-import numpy as np
-import pandas as pd
-from datetime import datetime
+import socket
 
+import numpy as np
+import xarray as xr
 
 num_particles_dict = {
     1 : '1particle',
@@ -17,6 +15,12 @@
     'test'   : 'test',
     'valid': 'validation'}
 
+def get_dataset_path():
+    if 'casper' in socket.gethostname():
+        return "/glade/p/cisl/aiml/ai4ess_hackathon/holodec/"
+    else:
+        return "/Users/ggantos/PycharmProjects/holodec-ml/data/"
+
 def dataset_name(num_particles, split, file_extension='nc'):
     """
     Return the dataset filename given user inputs
@@ -185,24 +189,43 @@ def calc_z_bins(train_outputs, valid_outputs, num_z_bins):
     z_bins = np.linspace(z_min, z_max, num_z_bins)
     return z_bins
 
+# added this because the previous code allowed a different max_particle size
+# depending on which split df was opened and the subset
+def get_max_particles(path_data, num_particles, output_cols):
+    ds = open_dataset(path_data, num_particles, "train")
+    outputs = ds[output_cols].to_dataframe()
+    max_particles = outputs['hid'].value_counts().max()
+    return max_particles
+
 # updated function to create the entire dataset template at one time to
 # decrease overhead and eliminate setting random seeds
-def make_template(df, num_images):
-    max_particles = df['hid'].value_counts().max()
-    size = (max_particles * num_images, 1) 
+def make_template(df, num_images, max_particles):
+    size = (num_images * max_particles, 1)
     x = np.random.uniform(low=df['x'].min(), high=df['x'].max(), size=size)
     y = np.random.uniform(low=df['y'].min(), high=df['y'].max(), size=size)
     z = np.random.uniform(low=df['z'].min(), high=df['z'].max(), size=size)
     d = np.random.uniform(low=df['d'].min(), high=df['d'].max(), size=size)
     prob = np.zeros(d.shape)
     template = np.hstack((x, y ,z ,d ,prob))
     template = template.reshape((num_images, max_particles, -1))
-    return template    
+    return template
+
+def make_random_outputs(ds):
+    num_images = ds.shape[0]
+    max_particles = ds.shape[1]
+    size = (num_images * max_particles, 1)
+    x = np.random.uniform(low=np.min(ds[:,:,0:1]), high=np.max(ds[:,:,0:1]), size=size)
+    y = np.random.uniform(low=np.min(ds[:,:,1:2]), high=np.max(ds[:,:,1:2]), size=size)
+    z = np.random.uniform(low=np.min(ds[:,:,2:3]), high=np.max(ds[:,:,2:3]), size=size)
+    d = np.random.uniform(low=np.min(ds[:,:,3:4]), high=np.max(ds[:,:,3:4]), size=size)
+    template = np.hstack((x, y, z, d))
+    template = template.reshape((num_images, max_particles, -1))
+    return template
 
 # cycles through dataset by "hid" to overwrite random data generated in
 # make_template with actual data and classification of 1
-def outputs_3d(outputs, num_images):
-    outputs_array = make_template(outputs, num_images)
+def outputs_3d(outputs, num_images, max_particles):
+    outputs_array = make_template(outputs, num_images, max_particles)
     for hid in outputs["hid"].unique():
         outputs_hid = outputs.loc[outputs['hid'] == hid].to_numpy()
         outputs_hid[:, -1] = 1
@@ -231,7 +254,6 @@ def load_scaled_datasets(path_data, num_particles, output_cols,
         valid_inputs: (np array) Valid input data scaled between 0 and 1
         valid_outputs: (np array) Scaled valid output data
     """
-
     train_inputs,\
     train_outputs = load_raw_datasets(path_data, num_particles, 'train',
                                       output_cols, subset)
@@ -259,10 +281,13 @@ def load_scaled_datasets(path_data, num_particles, output_cols,
     else:
         if train_inputs.shape[0] != train_outputs.shape[0]:
             col = [c for c in output_cols if c != 'hid']
+            max_particles = get_max_particles(path_data, num_particles, output_cols)
             train_outputs[col] = scaler_out.fit_transform(train_outputs[col])
-            train_outputs = outputs_3d(train_outputs, train_inputs.shape[0])
+            train_outputs = outputs_3d(train_outputs, train_inputs.shape[0],
+                                       max_particles)
             valid_outputs[col] = scaler_out.transform(valid_outputs[col])
-            valid_outputs = outputs_3d(valid_outputs, valid_inputs.shape[0])
+            valid_outputs = outputs_3d(valid_outputs, valid_inputs.shape[0],
+                                       max_particles)
         else:
             train_outputs.drop(['hid'], axis=1)
             train_outputs = scaler_out.fit_transform(train_outputs)

diff --git a/holodecml/losses.py b/holodecml/losses.py
@@ -1,45 +1,91 @@
 import logging
 import tensorflow as tf
-from typing import List, Dict
 import tensorflow.keras.backend as K
 
-logger = logging.getLogger(__name__)
 
+logger = logging.getLogger(__name__)
 
 class SymmetricCrossEntropy:
-    
+
     def __init__(self, a: float = 1.0, b: float = 1.0) -> None:
         self.a = a
         self.b = b
-    
+
     def __call__(self, *args, **kwargs) -> float:
         bce = tf.keras.losses.CategoricalCrossentropy()
         kld = tf.keras.losses.KLDivergence()
         return self.a * bce(*args, **kwargs) + self.b * kld(*args, **kwargs)
-    
-    
+
+
 def rmse(y_true, y_pred):
     return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
 
+
 def wmse(y_true, y_pred):
     return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
 
+
 def R2(y_true, y_pred):
     """ Is actually 1 - R2
     """
-    SS_res =  K.sum(K.square(y_true - y_pred))
+    SS_res = K.sum(K.square(y_true - y_pred))
     SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
-    return SS_res/(SS_tot + K.epsilon())
+    return SS_res / (SS_tot + K.epsilon())
+
 
 def keras_mse(y_true, y_pred):
     return K.mean(K.square(y_pred - y_true))
 
 
-def attention_net_loss(y_true, y_pred):
+def noisy_true_particle_loss(y_true, y_pred):
     # y_true and y_pred will have shape (batch_size x max_num_particles x 5)
     loss_real = tf.reduce_mean(tf.abs(y_true[y_true[:, :, -1] > 0] - y_pred[y_true[:, :, -1] > 0]))
-    loss_bce = binary_crossentropy(y_true[:,:,-1], 
-                                   y_pred[:,:,-1])
+    loss_bce = tf.keras.losses.binary_crossentropy(tf.reshape(y_true[:, :, -1],[-1]),
+                                                   tf.reshape(y_pred[:, :, -1],[-1]))
     loss_total = loss_real + loss_bce
     return loss_total
 
+def random_particle_distance_loss(y_true, y_pred):
+    loss_xy = tf.zeros((), dtype=tf.float32)
+    loss_z = tf.zeros((), dtype=tf.float32)
+    loss_d = tf.zeros((), dtype=tf.float32)
+
+    for h in range(tf.shape(y_pred)[0]):
+        y_pred_h = y_pred[h]
+        print("y_pred_h.shape", y_pred_h.get_shape())
+        y_true_h = y_true[h]
+        print("y_true_h.shape", y_true_h.shape)
+        real_idx = tf.argmin(y_true_h[:, -1], axis=0)
+        if real_idx == 0:
+            real_idx = tf.cast(tf.shape(y_true_h)[0], dtype=tf.int64)
+        print("real_idx.shape", real_idx.get_shape())
+        y_true_h = y_true_h[:real_idx]
+        print("y_true_h.shape", y_true_h.get_shape())
+
+        dist_x = (y_pred_h[:, 0:1] - tf.transpose(y_true_h)[0:1, :]) ** 2
+        dist_y = (y_pred_h[:, 1:2] - tf.transpose(y_true_h)[1:2, :]) ** 2
+        dist_xy = dist_x + dist_y
+        print(f"dist_xy.shape: {dist_xy.shape}")
+        loss_xy_h = tf.math.reduce_sum(tf.math.reduce_min(dist_xy, axis=1))
+        loss_xy = loss_xy + loss_xy_h
+
+        # determine index of true particle closest to each predicted particle
+        max_idx = tf.cast(tf.math.argmin(dist_xy, axis=1), dtype=tf.int32)
+        max_idx_2d = tf.stack((tf.range(tf.shape(dist_xy)[0]), max_idx), axis=-1)
+
+        loss_z_h = (y_pred_h[:, 2:3] - tf.transpose(y_true_h)[2:3, :]) ** 2
+        loss_z_h = tf.math.reduce_sum(tf.gather_nd(loss_z_h, max_idx_2d))
+        loss_z = loss_z + loss_z_h
+
+        loss_d_h = (y_pred_h[:, 3:4] - tf.transpose(y_true_h)[3:4, :]) ** 2
+        loss_d_h = tf.math.reduce_sum(tf.gather_nd(loss_d_h, max_idx_2d))
+        loss_d = loss_d + loss_d_h
+
+    loss_xy = loss_xy/tf.cast(tf.shape(y_pred)[0], dtype=tf.float32)
+    loss_z = loss_z/tf.cast(tf.shape(y_pred)[0], dtype=tf.float32)
+    loss_d = loss_d/tf.cast(tf.shape(y_pred)[0], dtype=tf.float32)
+
+    valid_error = loss_xy + loss_z + loss_d
+    print(f"ERROR SHAPE: {valid_error.shape}")
+
+    return valid_error 
diff --git a/holodecml/ml_utils.py b/holodecml/ml_utils.py
@@ -37,6 +37,7 @@ def __init__(self,in_array: xr.DataArray,dim: Tuple=None):
         self.max = in_array.max(dim=dim)
         self.delta = self.max-self.min
         self.mid = 0.5*(self.max+self.min)
+
     def fit_transform(self,new_array):
         """
         Apply rescaling to data

diff --git a/holodecml/models.py b/holodecml/models.py
@@ -23,7 +23,8 @@
     "rmse": rmse,
     "weighted_mse": wmse,
     "r2": R2,
-    "attn": attention_net_loss
+    "noisy": noisy_true_particle_loss,
+    "random": random_particle_distance_loss
 }
 
 custom_metrics = {
@@ -231,7 +232,7 @@ def __init__(self, hidden_layers=1, hidden_neurons=10, activation="relu", output
         self.output_num = output_num
         for i in range(self.hidden_layers):
             setattr(self, f"dense_{i:02d}", Dense(self.hidden_neurons, activation=activation))
-        self.output_dense = Dense(self.output_num)
+        self.output_dense = Dense(self.output_num, activation=None)
 
     def call(self, inputs, **kwargs):
         out = inputs
@@ -366,7 +367,7 @@ def run_particleattentionnet():
     particle_pos, holo = generate_gaussian_particles(num_images=num_images, num_particles=num_particles,
                                 image_size_pixels=image_size_pixels, gaussian_sd=filter_size)
     particle_pos_noisy = particle_pos * (1 + np.random.normal(0, noise_sd, particle_pos.shape))
-    net.compile(optimizer="adam", loss=custom_losses["attn"])
+    net.compile(optimizer="adam", loss=custom_losses["noisy"], metrics="noisy")
     net.fit([particle_pos_noisy, holo], particle_pos, epochs=15, batch_size=32, verbose=1)
     pred_particle_pos = net.predict([particle_pos_noisy, holo], batch_size=128)
     import matplotlib.pyplot as plt
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,7 +4,9 @@ @@
     *.out
     *.o
     *.out.*
-    batch**.sh
     *.o*
     .idea
     holodecml.egg-info/
+    data/
+    models/