Merge pull request #43 from QSD-Group/algae

Merge 'algae' branch into 'main' branch. Many thanks to @GaYeongKim !
QSD-Group · Nov 9, 2023 · 742b96e · 742b96e
2 parents 9c5d885 + 99960f8
commit 742b96e
Show file tree

Hide file tree

Showing 49 changed files with 16,434 additions and 0 deletions.
diff --git a/exposan/pm2_batch/README.rst b/exposan/pm2_batch/README.rst
@@ -0,0 +1,35 @@
+=====================================================================================
+pm2_batch: Calibration of Phototrophic-Mixotrophic Process Model (PM2)
+=====================================================================================
+
+Summary
+-------
+This module is used to calibrate the Phototrophic-Mixotrophic Process Model (PM2; ``qsdsan.processes.PM2``) using batch experiment results. Note that the package `optuna <https://optuna.org/>`_ is needed to run the calibration script.
+
+.. figure:: ./readme_figures/pm2_batch.svg
+
+   *Layout of the system used for the calibration of PM2 implemented in QSDsan*
+
+- ``system.py`` constructs the batch system above.
+- ``model.py`` creates a model for uncertainty and sensitivity analysis.
+- ``analyses.py`` runs uncertainty and sensitivity analysis and plots the results.
+- ``calibration.py`` calibrates kinetic model parameters.
+- ``_batch_may_unit_cali.py`` is used to check the calibration restuls.
+
+
+Load the system
+---------------
+.. code-block:: python
+
+   >>> from exposan import pm2_batch
+   >>> pm2_batch.load()
+   >>> sys = pm2_batch.sys
+   >>> sys.simulate(t_span=(0,1), method='RK23')
+   >>> PBR = pm2_batch.PBR
+   >>> fig, axis = PBR.scope.plot_time_series(('S_P'))
+   >>> fig
+
+.. figure:: ./readme_figures/pm2_batch_phosphorus.png
+
+    *Phosphorus removal in the batch reactor setting*
+
diff --git a/exposan/pm2_batch/__init__.py b/exposan/pm2_batch/__init__.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+'''
+EXPOsan: Exposition of sanitation and resource recovery systems
+
+This module is developed by:
+    Ga-Yeong Kim <gayeong1225@gmail.com>
+    Joy Zhang <joycheung1994@gmail.com>
+
+This module is under the University of Illinois/NCSA Open Source License.
+Please refer to https://github.com/QSD-Group/EXPOsan/blob/main/LICENSE.txt
+for license details.
+'''
+
+import os
+from exposan.utils import _init_modules
+pm2_path = os.path.dirname(__file__)
+module = os.path.split(pm2_path)[-1]
+data_path, results_path, figures_path = \
+    _init_modules(module, include_data_path=True, include_figures_path=True)
+del os
+
+from . import system
+
+from .system import *
+
+_system_loaded = False
+def load(reload=False, pm2_kwargs={}, init_conds={}):
+
+    global _system_loaded
+    if not _system_loaded: reload = True
+    if reload:
+        global cmps, components, pm2, sys
+        sys = create_system(
+            pm2_kwargs=pm2_kwargs,
+            init_conds=init_conds,
+            )
+        PBR = sys.flowsheet.unit.PBR
+        cmps = components = PBR.components
+        pm2 = PBR.model
+    dct = globals()
+    dct.update(sys.flowsheet.to_dict())
+    _system_loaded = True
+
+def __getattr__(name):
+    if not _system_loaded:
+        raise AttributeError(f'module "{__name__}" not yet loaded, '
+                             f'load module with `{__name__}.load()`.')
+
+from . import model
+from .model import *
+
+__all__ = (
+    'pm2_path',
+    'data_path',
+    'results_path',
+    'figures_path',
+    *system.__all__,
+    *model.__all__,
+	)
diff --git a/exposan/pm2_batch/_batch_may_unit_cali.py b/exposan/pm2_batch/_batch_may_unit_cali.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+'''
+EXPOsan: Exposition of sanitation and resource recovery systems
+
+This module is developed by:
+    Ga-Yeong Kim <gayeong1225@gmail.com>
+    Joy Zhang <joycheung1994@gmail.com>
+
+This module is under the University of Illinois/NCSA Open Source License.
+Please refer to https://github.com/QSD-Group/EXPOsan/blob/main/LICENSE.txt
+for license details.
+'''
+
+import numpy as np
+
+import qsdsan.processes as pc, qsdsan.sanunits as su
+from qsdsan import System
+from qsdsan.utils import ospath, time_printer, \
+    ExogenousDynamicVariable as EDV
+
+cmps = pc.create_pm2_cmps()
+
+pm2 = pc.PM2(arr_e=6842, K_P=1.0, f_CH_max=0.819, exponent=4, q_CH=1.92792246509906, q_LI=26.1535941900048, V_NH=0.150722549179019, V_P=0.540050768528713,
+              a_c=0.049, I_n=1500, arr_a=1.8e10, beta_1=2.90,
+              beta_2=3.50, b_reactor=0.03, I_opt=2000, k_gamma=1e-5,
+              K_N=0.1, K_A=6.3, K_F=6.3, rho=1.186, K_STO=1.566,
+              f_LI_max=3.249, m_ATP=10,
+              mu_max=1.969, Q_N_max=0.417, Q_N_min=0.082, Q_P_max=0.092, Q_P_min=0.0163,
+              V_NO=0.003, n_dark=0.7,
+              Y_ATP_PHO=55.073, Y_CH_PHO=0.754, Y_LI_PHO=0.901, Y_X_ALG_PHO=0.450,
+              Y_ATP_HET_ACE=39.623, Y_CH_NR_HET_ACE=0.625, Y_CH_ND_HET_ACE=0.600,
+              Y_LI_NR_HET_ACE=1.105, Y_LI_ND_HET_ACE=0.713, Y_X_ALG_HET_ACE=0.216,
+              Y_ATP_HET_GLU=58.114, Y_CH_NR_HET_GLU=0.917, Y_CH_ND_HET_GLU=0.880,
+              Y_LI_NR_HET_GLU=1.620, Y_LI_ND_HET_GLU=1.046, Y_X_ALG_HET_GLU=0.317)   # sequential calibration, seed333
+
+pm2_path = ospath.dirname(__file__)
+data_path = ospath.join(pm2_path, 'data/exo_vars_batch_may_unit.xlsx')
+
+T, I = EDV.batch_init(data_path, 'linear')
+
+PBR = su.BatchExperiment('PBR', model=pm2, exogenous_vars=(T, I))
+
+init_concs = {
+    'X_CHL':2.81,
+    'X_ALG':561.57,
+    'X_CH':13.74,
+    'X_LI':62.22,
+    'S_CO2':30.0,
+    'S_A':5.0,
+    'S_F':5.0,
+    'S_O2':20.36,
+    'S_NH':25,
+    'S_NO':9.30,
+    'S_P':0.383,
+    'X_N_ALG':3.62,
+    'X_P_ALG':12.60,
+    }
+
+PBR.set_init_conc(**init_concs)
+
+sys = System('sys', path=(PBR,))
+sys.set_dynamic_tracker(PBR)
+
+@time_printer
+def run(t, t_step, method=None, print_t=False, **kwargs):
+    if method:
+        sys.simulate(state_reset_hook='reset_cache',
+                      t_span=(0,t),
+                      t_eval=np.arange(0, t+t_step, t_step),
+                      method=method,
+                      # rtol=1e-2,
+                      # atol=1e-3,
+                      export_state_to=f'results/sol_{t}d_{method}_batch_may_unit_cali_optuna_sequential_cali.xlsx',
+                      print_t=print_t,
+                      **kwargs)
+    else:
+        sys.simulate(state_reset_hook='reset_cache',
+                      solver='odeint',
+                      t=np.arange(0, t+t_step/30, t_step/30),
+                      # export_state_to=f'results/sol_{t}d_odeint.xlsx',
+                      print_msg=True,
+                      print_t=print_t,
+                      **kwargs)
+
+if __name__ == '__main__':
+    t = 0.25
+    t_step = 0.01
+    # method = 'RK45'
+    # method = 'RK23'  # original
+    # method = 'DOP853'
+    # method = 'Radau'
+    method = 'BDF'
+    # method = 'LSODA'
+    # method = None
+    msg = f'Method {method}'
+    print(f'\n{msg}\n{"-"*len(msg)}') # long live OCD!
+    print(f'Time span 0-{t}d \n')
+    run(t, t_step, method=method,
+        print_t = True,
+        )
diff --git a/exposan/pm2_batch/analyses.py b/exposan/pm2_batch/analyses.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+'''
+EXPOsan: Exposition of sanitation and resource recovery systems
+
+This module is developed by:
+    Ga-Yeong Kim <gayeong1225@gmail.com>
+    Joy Zhang <joycheung1994@gmail.com>
+
+This module is under the University of Illinois/NCSA Open Source License.
+Please refer to https://github.com/QSD-Group/EXPOsan/blob/main/LICENSE.txt
+for license details.
+'''
+from time import time
+from qsdsan.utils import ospath, load_data
+from qsdsan.stats import get_correlations, plot_correlations
+from exposan.pm2_batch import (
+    results_path,
+    figures_path,
+    create_model,
+    run_uncertainty
+    )
+from biosteam.evaluation._utils import var_indices
+from math import ceil
+
+import os, numpy as np, pandas as pd, matplotlib as mpl, seaborn as sns, \
+    matplotlib.pyplot as plt
+    # matplotlib.ticker as tk
+
+mpl.rcParams['font.sans-serif'] = 'arial'
+mpl.rcParams["figure.autolayout"] = True
+mpl.rcParams['xtick.minor.visible'] = True
+
+# N = 1000        # takes 3.5 min
+# N = 2000        # takes 6.5 min
+# N = 3000        # takes 8 min
+# N = 5000        # takes 14 min
+# N = 10000       # takes 23.5 min
+# N = 15000       # takes 33 min
+N = 20000       # takes 43.5 min
+
+T = 0.25    # T for include
+# T = 7       # T for exclude
+t_step = 0.01
+
+# rmse_thresholds = [25, 25, 25]
+nrmse_thresholds = [None, 0.1, 0.1]
+
+kind='include'
+# kind='exclude'
+
+analysis='uasa'
+# analysis='cali'
+
+#%%
+def seed_RGT():
+    files = os.listdir(results_path)
+    seeds = [int(file_name[-3:]) for file_name in files if file_name.startswith('time_series_data')]
+    seed = int(str(time())[-3:])
+    if len(set(seeds)) >= 1000:
+        raise RuntimeError('The program has run out of 3-digit seeds to use. Consider'
+                           'clean up the results folder.')
+    while seed in seeds:
+        seed = (seed+1) % 1000
+    return seed
+
+#%%
+def run_UA_SA(seed=None, N=N, T=T, t_step=t_step, thresholds=[], kind=kind, analysis=analysis, plot=False):
+    seed = seed or seed_RGT()
+    mdl = create_model(kind=kind, analysis=analysis)
+    mdl = run_uncertainty(mdl, N, T, t_step, seed=seed)
+    thresholds = update_thresholds(mdl, thresholds)
+    D, p = get_correlations(mdl, kind='KS', thresholds=thresholds,
+                            file=ospath.join(results_path, f'KS_test_{seed}.xlsx')
+                            )
+    if plot:
+        plot_cdf_by_group(mdl, thresholds=thresholds)
+    fig, ax = plot_correlations(D, close_fig=False,
+                                file=ospath.join(figures_path, 'KS_test_D.png'))
+    return mdl
+
+def update_thresholds(mdl, thresholds, metrics=None, quantile=0.25):
+    metrics = metrics or mdl.metrics
+    thresholds = thresholds or [None]*len(metrics)
+    data = mdl.table[var_indices(metrics)]
+    for i, col in enumerate(data):
+        if thresholds[i] is None:
+            thresholds[i] = data[col].quantile(quantile)
+    return thresholds
+
+def plot_cdf_by_group(mdl=None, seed=None, thresholds=None, parameters=None, metrics=None, kind=kind, analysis=analysis):
+    if mdl is None:
+        # global mdl
+        mdl = create_model(kind=kind, analysis=analysis)
+        mdl.table = load_data(ospath.join(results_path, f'table_{seed}.xlsx'), header=[0, 1])
+    metrics = metrics or mdl.metrics
+    parameters = parameters or mdl.parameters
+    thresholds = thresholds or update_thresholds(mdl, thresholds, metrics)
+    x_df = mdl.table[var_indices(parameters)]
+    y_df = mdl.table[var_indices(metrics)]
+    ncol = 4
+    nrow = ceil(x_df.shape[1]/ncol)
+    for m, t in zip(y_df.items(), thresholds):
+        y, err = m
+        group = err <= t
+        fig, axes = plt.subplots(nrow, ncol, #sharey=True,
+                                 figsize=(ncol*4, nrow*4),
+                                 layout='constrained')
+        for col, ax in zip(x_df, axes.ravel()):
+            sns.kdeplot(data=x_df[col][group], ax=ax,
+                        cumulative=False, common_norm=True,
+                        # cumulative=True,
+                        label=f'{y[-1]} <= {round(t,2)}')
+            sns.kdeplot(data=x_df[col][1-group], ax=ax,
+                        cumulative=False, common_norm=True,
+                        # cumulative=True,
+                        label=f'{y[-1]} > {round(t,2)}')
+            ax.tick_params(axis='both', which='both', direction='inout')
+            ax.legend()
+            ax.set_xlabel(col[-1])
+            ax.set_ylabel('density')
+        # fig.subplots_adjust(hspace=0.4, wspace=0.05, bottom=0.2)
+        fig.savefig(ospath.join(figures_path, f'pdf_{y[-1]}.png'),
+                    dpi=300, facecolor='white')
+        del fig, axes
+
+def KS_test_var_thresholds(mdl=None, seed=None, kind=kind, analysis=analysis):
+    if mdl is None:
+        mdl = create_model(kind=kind, analysis=analysis)
+        mdl.table = load_data(ospath.join(results_path, f'table_{seed}.xlsx'),
+                              header=[0,1])
+    sig = []
+    thresholds = []
+    quantiles = np.linspace(0.05, 0.5, 10)
+    for q in quantiles:
+        thrs = update_thresholds(mdl, [], quantile=q)
+        D, p = mdl.kolmogorov_smirnov_d(thresholds=thrs)
+        thresholds.append(thrs)
+        sig.append(p < 0.05)
+    out = {m: pd.DataFrame() for m in var_indices(mdl.metrics)}
+    thresholds = np.asarray(thresholds).T
+
+    for s, q in zip(sig, quantiles):
+        for m, df in out.items():
+            df[q] = s[m]
+    with pd.ExcelWriter(ospath.join(results_path, f'sig_params_{seed}.xlsx')) as writer:
+        for m, thrs in zip(out, thresholds):
+            df = out[m]
+            df.index = df.index.droplevel()
+            df.columns = pd.MultiIndex.from_tuples([(col, t) for col,t \
+                                                    in zip(df.columns, thrs)],
+                                                   names=['quantile', 'NRMSE'])
+            df.to_excel(writer, sheet_name=m[-1])
+
+#%%
+if __name__ == '__main__':
+
+    # seed = 201          # include, N=1000
+    # seed = 502          # include, N=2000
+    # seed = 503          # include, N=3000
+    # seed = 505          # include, N=5000
+    # seed = 400          # include, N=10000
+    # seed = 206          # include, N=15000
+    seed = 210          # include, N=20000
+
+    mdl = run_UA_SA(seed=seed)
+    plot_cdf_by_group(seed=seed)
+    KS_test_var_thresholds(mdl=mdl,seed=seed)