From 18bdb77ee1547408fc06c3208fd48bee4d719b28 Mon Sep 17 00:00:00 2001
From: kylajones <kjones29@nd.edu>
Date: Sat, 19 Oct 2024 16:27:31 -0400
Subject: [PATCH] found a bug in the mcmcmodel, fixed it

---
 .../test_no_disc_TP15_error_estimation.py     | 33 +++++---------
 ...test_no_disc_TP15_error_estimation_mcmc.py | 45 ++++++++++---------
 plot_mcmc_and_linfa.py                        |  4 +-
 run_plot_res.sh                               |  4 +-
 4 files changed, 38 insertions(+), 48 deletions(-)

diff --git a/linfa/tests/test_no_disc_TP15_error_estimation.py b/linfa/tests/test_no_disc_TP15_error_estimation.py
index 74f3ca8..5334217 100644
--- a/linfa/tests/test_no_disc_TP15_error_estimation.py
+++ b/linfa/tests/test_no_disc_TP15_error_estimation.py
@@ -15,8 +15,8 @@ def run_test():
     exp = experiment()
     exp.name = "TP15_no_disc_error_estimation"
     exp.flow_type           = 'realnvp'     # str: Type of flow (default 'realnvp') # TODO: generalize to work for TP1
-    exp.n_blocks            = 30            # int: Number of hidden layers  
-    exp.hidden_size         = 100           # int: Hidden layer size for MADE in each layer (default 100)
+    exp.n_blocks            = 50            # int: Number of hidden layers  
+    exp.hidden_size         = 10            # int: Hidden layer size for MADE in each layer (default 100)
     exp.n_hidden            = 1             # int: Number of hidden layers in each MADE
     exp.activation_fn       = 'relu'        # str: Activation function used (default 'relu')
     exp.input_order         = 'sequential'  # str: Input oder for create_mask (default 'sequential')
@@ -25,12 +25,12 @@ def run_test():
     
     # p0,e,sigma_e (measurement noise also estimated)
     exp.input_size          = 3             # int: Dimensionalty of input (default 2)
-    exp.batch_size          = 200           # int: Number of samples generated (default 100)
-    exp.true_data_num       = 2             # double: Number of true model evaluted (default 2)
-    exp.n_iter              = 2000          # int: Number of iterations (default 25001)
-    exp.lr                  = 0.0001        # float: Learning rate (default 0.003)
+    exp.batch_size          = 300           # int: Number of samples generated (default 100)
+    exp.true_data_num       = 1             # double: Number of true model evaluted (default 2)
+    exp.n_iter              = 4000          # int: Number of iterations (default 25001)
+    exp.lr                  = 0.0005        # float: Learning rate (default 0.003)
     exp.lr_decay            = 0.9999        # float:  Learning rate decay (default 0.9999)
-    exp.log_interval        = 1             # int: How often to show loss stat (default 10)
+    exp.log_interval        = 10            # int: How often to show loss stat (default 10)
 
     exp.run_nofas           = False         # normalizing flow with adaptive surrogate
     exp.surrogate_type      = 'discrepancy' # type of surrogate we are using
@@ -73,7 +73,6 @@ def run_test():
 
     # Read data
     exp.model.data = np.loadtxt('observations.csv', delimiter = ',', skiprows = 1)
-    print(exp.model.defParams.detach().numpy()[0,2] * np.mean(exp.model.data[:,2]))
 
     if(len(exp.model.data.shape) < 2):
         exp.model.data = np.expand_dims(exp.model.data, axis=0)
@@ -109,29 +108,17 @@ def log_density(calib_inputs, model, surrogate, transform):
             discrepancy = surrogate.forward(model.var_in)
         
         # Get the calibration parameters
-        p0Const, eConst, std_dev = torch.chunk(phys_inputs, chunks = 3, dim = 1)
-
-        # Get data - (num_var x num_obs)
+        p0Const, eConst, std_dev_ratio = torch.chunk(phys_inputs, chunks = 3, dim = 1)
         Data = torch.tensor(model.data[:,2:]).to(exp.device)
         num_reapeat_obs = Data.size(1)
 
         # Convert standard deviation ratio to standard deviation
-        std_dev = std_dev.flatten() * torch.mean(Data)
-        # LL = np.zeros(exp.batch_size)
+        std_dev = std_dev_ratio.flatten() * torch.mean(Data)
         total_nll = 0
 
         # Evaluate log-likelihood:
         # Loop on the available observations
         for loopA in range(num_reapeat_obs):
-
-            # # loop over batches
-            # for loopB in range(exp.batch_size):
-
-            #     mean = modelOut[loopB].detach().numpy()
-            #     cov = std_dev.detach().numpy()[loopB]**2 * np.eye(mean.shape[0])
-            #     data = Data[:, loopA].unsqueeze(0).detach().numpy()
-
-            #     LL[loopB] = stats.multivariate_normal.logpdf(x = data, mean = mean, cov = cov)
             
             # -n / 2 * log ( 2 pi ) 
             l1 = -0.5 * np.prod(model.data.shape[0]) * np.log(2.0 * np.pi)
@@ -209,6 +196,6 @@ def generate_data(use_true_model = False, num_observations=50):
 # Main code
 if __name__ == "__main__":
 
-    generate_data(use_true_model = False, num_observations = 1)
+    #generate_data(use_true_model = False, num_observations = 1)
 
     run_test()
\ No newline at end of file
diff --git a/linfa/tests/test_no_disc_TP15_error_estimation_mcmc.py b/linfa/tests/test_no_disc_TP15_error_estimation_mcmc.py
index 696dbc8..beb492c 100644
--- a/linfa/tests/test_no_disc_TP15_error_estimation_mcmc.py
+++ b/linfa/tests/test_no_disc_TP15_error_estimation_mcmc.py
@@ -10,7 +10,6 @@
 # Import the RCR model
 from linfa.models.discrepancy_models import PhysChem_error
 
-
 def run_test(num_results, num_burnin_steps):
     
     # Set variable grid
@@ -23,22 +22,25 @@ def run_test(num_results, num_burnin_steps):
     # Read data
     model.data = np.loadtxt('observations.csv', delimiter=',', skiprows=1)
 
+    data_mean = np.mean(model.data[:,2:])
+        
     # Form tensors for variables and results in observations
-    var_grid_in = tf.convert_to_tensor(model.data[:,:2], dtype = tf.float32)
-    var_grid_out = tf.convert_to_tensor(model.data[:,2:], dtype = tf.float32)
+    var_grid_in = tf.convert_to_tensor(model.data[:,:2], dtype=tf.float32)
+    var_grid_out = tf.convert_to_tensor(model.data[:,2:], dtype=tf.float32)
 
     def target_log_prob_fn(theta, log_sigma):
     
         # Transform log_sigma to sigma (ensuring sigma is positive)
-        sigma = tf.exp(log_sigma)
-        
+        sigma = tf.exp(log_sigma) * data_mean
         # Transformations on theta
         theta1 = tf.exp(theta[0])  # Keep theta_1 on the log scale
-        theta2 = -21E3 + 1000 * tf.tanh(theta[1])  # Allow some flexibility around -21E3
+        
+        # Use sigmoid transformation for theta2 to map between (-15E3, -30E3)
+        theta2 = -30E3 + (tf.sigmoid(theta[1]) * 15E3)  # Maps theta_2 between -22E3 and -21E3
 
         # Priors on transformed parameters
-        prior_theta1 = tfd.Normal(loc=1000.0, scale=100.0).log_prob(theta1)
-        prior_theta2 = tfd.Normal(loc=-21.0E3, scale=500.0).log_prob(theta2)
+        prior_theta1 = tfd.Normal(loc = 1000.0, scale = 100.0).log_prob(theta1)
+        prior_theta2 = tfd.Normal(loc = -21.0E3, scale = 500.0).log_prob(theta2)
         prior_theta = prior_theta1 + prior_theta2
 
         # Prior on sigma^2 (Beta prior as used)
@@ -59,7 +61,7 @@ def target_log_prob_fn(theta, log_sigma):
         y_pred_tf = tf.convert_to_tensor(y_pred_np, dtype=tf.float32)
 
         # Likelihood: y_i ~ N(g(x_i, theta), sigma^2)
-        likelihood = tfd.MultivariateNormalDiag(loc=y_pred_tf, scale_diag=sigma * tf.ones_like(y_pred_tf)).log_prob(var_grid_out)
+        likelihood = tfd.MultivariateNormalDiag(loc=y_pred_tf, scale_diag = sigma * tf.ones_like(y_pred_tf)).log_prob(var_grid_out)
 
         return tf.reduce_sum(likelihood) + tf.reduce_sum(prior_theta) + tf.reduce_sum(prior_sigma)
 
@@ -67,21 +69,22 @@ def target_log_prob_fn(theta, log_sigma):
     step_size = 0.1  # Adjust step size for better exploration
 
     mh_kernel = tfp.mcmc.RandomWalkMetropolis(
-        target_log_prob_fn = target_log_prob_fn,
-        new_state_fn = tfp.mcmc.random_walk_normal_fn(scale = step_size))
+        target_log_prob_fn=target_log_prob_fn,
+        new_state_fn=tfp.mcmc.random_walk_normal_fn(scale=step_size)
+    )
     
-    initial_theta1 = tf.math.log(tf.ones([], dtype=tf.float32) * 1E3)
-    initial_theta2 = tf.zeros([], dtype=tf.float32)
+    initial_theta1 = tf.math.log(tf.ones([], dtype=tf.float32) * 1200)
+    initial_theta2 = tf.zeros([], dtype=tf.float32)  # Initialize at 0 to center sigmoid at midpoint of range
     initial_theta = tf.stack([initial_theta1, initial_theta2])
     initial_log_sigma = tf.math.log(tf.ones([], dtype=tf.float32) * 0.05)  # Start with sigma = 0.05
 
     # Run MCMC sampling
     samples, kernel_results = tfp.mcmc.sample_chain(
-        num_results = num_results,
-        num_burnin_steps = num_burnin_steps,
-        current_state = [initial_theta, initial_log_sigma],
-        kernel = mh_kernel,
-        trace_fn = lambda current_state, kernel_results: kernel_results.is_accepted
+        num_results=num_results,
+        num_burnin_steps=num_burnin_steps,
+        current_state=[initial_theta, initial_log_sigma],
+        kernel=mh_kernel,
+        trace_fn=lambda current_state, kernel_results: kernel_results.is_accepted
     )
 
     # Unpack theta samples and transform back
@@ -90,8 +93,8 @@ def target_log_prob_fn(theta, log_sigma):
     # Transform theta1 back to original scale (it was on log scale during sampling)
     theta1_samples = tf.exp(theta_samples[:, 0])
 
-    # Theta2 is already transformed using the affine transformation, so no further transformation needed
-    theta2_samples = -21E3 + 1000 * tf.tanh(theta_samples[:, 1])
+    # Apply the same sigmoid transformation for theta2 back to the original scale
+    theta2_samples = -30E3 + (tf.sigmoid(theta_samples[:, 1]) * 15E3)
 
     # Transform log_sigma samples back to sigma
     sigma_samples = tf.exp(log_sigma_samples)
@@ -185,7 +188,7 @@ def generate_data(use_true_model = False, num_observations=50):
 
     # generate_data(use_true_model = False, num_observations = 1)
     
-    samples, kernel_results = run_test(10000, 500)
+    samples, kernel_results = run_test(10000, 1000)
     
     save_results(samples)
 
diff --git a/plot_mcmc_and_linfa.py b/plot_mcmc_and_linfa.py
index 557a0d0..fe1591d 100644
--- a/plot_mcmc_and_linfa.py
+++ b/plot_mcmc_and_linfa.py
@@ -30,8 +30,8 @@ def plot_marginals(param_data, idx1, fig_format='png'):
   gt_params = [1000, -21.0E3, 0.05]
 
   plt.figure(figsize=(6, 6))
-  plt.hist(param_data[:, idx1], color = 'blue', alpha = 0.25, label = 'LINFA') #, density = True)
-  plt.hist(mcmc_1_data, color = 'red', alpha = 0.25, label = 'MCMC') #density = True)
+  plt.hist(param_data[:, idx1], color = 'blue', alpha = 0.25, label = 'LINFA', density = True)
+  plt.hist(mcmc_1_data, color = 'red', alpha = 0.25, label = 'MCMC', density = True)
   plt.axvline(gt_params[idx1], color = 'r')
   plt.xlabel(r'$\theta_{K,'+str(idx1+1)+'}$')
   plt.legend()
diff --git a/run_plot_res.sh b/run_plot_res.sh
index f1bb650..30365fa 100644
--- a/run_plot_res.sh
+++ b/run_plot_res.sh
@@ -1,7 +1,7 @@
-# python3 linfa/plot_res.py --folder results/ --name TP15_no_disc_error_estimation --iter 2000 --picformat png
+python3 linfa/plot_res.py --folder results/ --name TP15_no_disc_error_estimation --iter 4000 --picformat png
 # python3 linfa/plot_disc.py --folder results/ --name test_08_lf_w_disc_TP1_uniform_prior --iter 25000 --picformat png --mode histograms --num_points 10 --limfactor 1.0 --saveinterval 1000 --dropouts 10
 # python3 linfa/plot_disc.py --folder results/ --name test_19_lf_w_disc_TP15_rep_meas_dropout --iter 10000 --picformat png --mode discr_surface --num_points 10 --limfactor 1.0 --saveinterval 1000
 # python3 linfa/plot_disc.py --folder results/ --name test_08_lf_w_disc_TP1_uniform_prior --iter 25000 --picformat png --mode marginal_stats --num_points 10 --limfactor 1.0 --saveinterval 1000
 # python3 linfa/plot_disc.py --folder results/ --name TP1_no_disc_gaussian_prior --iter 10000 --picformat png --mode marginal_posterior --num_points 10 --limfactor 1.0 --saveinterval 1000
-python3 plot_mcmc_and_linfa.py --folder results/ --name TP15_no_disc_error_estimation --iter 2000 --picformat png
+python3 plot_mcmc_and_linfa.py --folder results/ --name TP15_no_disc_error_estimation --iter 4000 --picformat png