Skip to content

Commit

Permalink
Merge pull request #43 from QSD-Group/algae
Browse files Browse the repository at this point in the history
Merge 'algae' branch into 'main' branch. Many thanks to @GaYeongKim !
  • Loading branch information
yalinli2 authored Nov 9, 2023
2 parents 9c5d885 + 99960f8 commit 742b96e
Show file tree
Hide file tree
Showing 49 changed files with 16,434 additions and 0 deletions.
35 changes: 35 additions & 0 deletions exposan/pm2_batch/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
=====================================================================================
pm2_batch: Calibration of Phototrophic-Mixotrophic Process Model (PM2)
=====================================================================================

Summary
-------
This module is used to calibrate the Phototrophic-Mixotrophic Process Model (PM2; ``qsdsan.processes.PM2``) using batch experiment results. Note that the package `optuna <https://optuna.org/>`_ is needed to run the calibration script.

.. figure:: ./readme_figures/pm2_batch.svg

*Layout of the system used for the calibration of PM2 implemented in QSDsan*

- ``system.py`` constructs the batch system above.
- ``model.py`` creates a model for uncertainty and sensitivity analysis.
- ``analyses.py`` runs uncertainty and sensitivity analysis and plots the results.
- ``calibration.py`` calibrates kinetic model parameters.
- ``_batch_may_unit_cali.py`` is used to check the calibration restuls.


Load the system
---------------
.. code-block:: python
>>> from exposan import pm2_batch
>>> pm2_batch.load()
>>> sys = pm2_batch.sys
>>> sys.simulate(t_span=(0,1), method='RK23')
>>> PBR = pm2_batch.PBR
>>> fig, axis = PBR.scope.plot_time_series(('S_P'))
>>> fig
.. figure:: ./readme_figures/pm2_batch_phosphorus.png

*Phosphorus removal in the batch reactor setting*

59 changes: 59 additions & 0 deletions exposan/pm2_batch/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
'''
EXPOsan: Exposition of sanitation and resource recovery systems
This module is developed by:
Ga-Yeong Kim <gayeong1225@gmail.com>
Joy Zhang <joycheung1994@gmail.com>
This module is under the University of Illinois/NCSA Open Source License.
Please refer to https://github.com/QSD-Group/EXPOsan/blob/main/LICENSE.txt
for license details.
'''

import os
from exposan.utils import _init_modules
pm2_path = os.path.dirname(__file__)
module = os.path.split(pm2_path)[-1]
data_path, results_path, figures_path = \
_init_modules(module, include_data_path=True, include_figures_path=True)
del os

from . import system

from .system import *

_system_loaded = False
def load(reload=False, pm2_kwargs={}, init_conds={}):

global _system_loaded
if not _system_loaded: reload = True
if reload:
global cmps, components, pm2, sys
sys = create_system(
pm2_kwargs=pm2_kwargs,
init_conds=init_conds,
)
PBR = sys.flowsheet.unit.PBR
cmps = components = PBR.components
pm2 = PBR.model
dct = globals()
dct.update(sys.flowsheet.to_dict())
_system_loaded = True

def __getattr__(name):
if not _system_loaded:
raise AttributeError(f'module "{__name__}" not yet loaded, '
f'load module with `{__name__}.load()`.')

from . import model
from .model import *

__all__ = (
'pm2_path',
'data_path',
'results_path',
'figures_path',
*system.__all__,
*model.__all__,
)
100 changes: 100 additions & 0 deletions exposan/pm2_batch/_batch_may_unit_cali.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# -*- coding: utf-8 -*-
'''
EXPOsan: Exposition of sanitation and resource recovery systems
This module is developed by:
Ga-Yeong Kim <gayeong1225@gmail.com>
Joy Zhang <joycheung1994@gmail.com>
This module is under the University of Illinois/NCSA Open Source License.
Please refer to https://github.com/QSD-Group/EXPOsan/blob/main/LICENSE.txt
for license details.
'''

import numpy as np

import qsdsan.processes as pc, qsdsan.sanunits as su
from qsdsan import System
from qsdsan.utils import ospath, time_printer, \
ExogenousDynamicVariable as EDV

cmps = pc.create_pm2_cmps()

pm2 = pc.PM2(arr_e=6842, K_P=1.0, f_CH_max=0.819, exponent=4, q_CH=1.92792246509906, q_LI=26.1535941900048, V_NH=0.150722549179019, V_P=0.540050768528713,
a_c=0.049, I_n=1500, arr_a=1.8e10, beta_1=2.90,
beta_2=3.50, b_reactor=0.03, I_opt=2000, k_gamma=1e-5,
K_N=0.1, K_A=6.3, K_F=6.3, rho=1.186, K_STO=1.566,
f_LI_max=3.249, m_ATP=10,
mu_max=1.969, Q_N_max=0.417, Q_N_min=0.082, Q_P_max=0.092, Q_P_min=0.0163,
V_NO=0.003, n_dark=0.7,
Y_ATP_PHO=55.073, Y_CH_PHO=0.754, Y_LI_PHO=0.901, Y_X_ALG_PHO=0.450,
Y_ATP_HET_ACE=39.623, Y_CH_NR_HET_ACE=0.625, Y_CH_ND_HET_ACE=0.600,
Y_LI_NR_HET_ACE=1.105, Y_LI_ND_HET_ACE=0.713, Y_X_ALG_HET_ACE=0.216,
Y_ATP_HET_GLU=58.114, Y_CH_NR_HET_GLU=0.917, Y_CH_ND_HET_GLU=0.880,
Y_LI_NR_HET_GLU=1.620, Y_LI_ND_HET_GLU=1.046, Y_X_ALG_HET_GLU=0.317) # sequential calibration, seed333

pm2_path = ospath.dirname(__file__)
data_path = ospath.join(pm2_path, 'data/exo_vars_batch_may_unit.xlsx')

T, I = EDV.batch_init(data_path, 'linear')

PBR = su.BatchExperiment('PBR', model=pm2, exogenous_vars=(T, I))

init_concs = {
'X_CHL':2.81,
'X_ALG':561.57,
'X_CH':13.74,
'X_LI':62.22,
'S_CO2':30.0,
'S_A':5.0,
'S_F':5.0,
'S_O2':20.36,
'S_NH':25,
'S_NO':9.30,
'S_P':0.383,
'X_N_ALG':3.62,
'X_P_ALG':12.60,
}

PBR.set_init_conc(**init_concs)

sys = System('sys', path=(PBR,))
sys.set_dynamic_tracker(PBR)

@time_printer
def run(t, t_step, method=None, print_t=False, **kwargs):
if method:
sys.simulate(state_reset_hook='reset_cache',
t_span=(0,t),
t_eval=np.arange(0, t+t_step, t_step),
method=method,
# rtol=1e-2,
# atol=1e-3,
export_state_to=f'results/sol_{t}d_{method}_batch_may_unit_cali_optuna_sequential_cali.xlsx',
print_t=print_t,
**kwargs)
else:
sys.simulate(state_reset_hook='reset_cache',
solver='odeint',
t=np.arange(0, t+t_step/30, t_step/30),
# export_state_to=f'results/sol_{t}d_odeint.xlsx',
print_msg=True,
print_t=print_t,
**kwargs)

if __name__ == '__main__':
t = 0.25
t_step = 0.01
# method = 'RK45'
# method = 'RK23' # original
# method = 'DOP853'
# method = 'Radau'
method = 'BDF'
# method = 'LSODA'
# method = None
msg = f'Method {method}'
print(f'\n{msg}\n{"-"*len(msg)}') # long live OCD!
print(f'Time span 0-{t}d \n')
run(t, t_step, method=method,
print_t = True,
)
167 changes: 167 additions & 0 deletions exposan/pm2_batch/analyses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
'''
EXPOsan: Exposition of sanitation and resource recovery systems
This module is developed by:
Ga-Yeong Kim <gayeong1225@gmail.com>
Joy Zhang <joycheung1994@gmail.com>
This module is under the University of Illinois/NCSA Open Source License.
Please refer to https://github.com/QSD-Group/EXPOsan/blob/main/LICENSE.txt
for license details.
'''
from time import time
from qsdsan.utils import ospath, load_data
from qsdsan.stats import get_correlations, plot_correlations
from exposan.pm2_batch import (
results_path,
figures_path,
create_model,
run_uncertainty
)
from biosteam.evaluation._utils import var_indices
from math import ceil

import os, numpy as np, pandas as pd, matplotlib as mpl, seaborn as sns, \
matplotlib.pyplot as plt
# matplotlib.ticker as tk

mpl.rcParams['font.sans-serif'] = 'arial'
mpl.rcParams["figure.autolayout"] = True
mpl.rcParams['xtick.minor.visible'] = True

# N = 1000 # takes 3.5 min
# N = 2000 # takes 6.5 min
# N = 3000 # takes 8 min
# N = 5000 # takes 14 min
# N = 10000 # takes 23.5 min
# N = 15000 # takes 33 min
N = 20000 # takes 43.5 min

T = 0.25 # T for include
# T = 7 # T for exclude
t_step = 0.01

# rmse_thresholds = [25, 25, 25]
nrmse_thresholds = [None, 0.1, 0.1]

kind='include'
# kind='exclude'

analysis='uasa'
# analysis='cali'

#%%
def seed_RGT():
files = os.listdir(results_path)
seeds = [int(file_name[-3:]) for file_name in files if file_name.startswith('time_series_data')]
seed = int(str(time())[-3:])
if len(set(seeds)) >= 1000:
raise RuntimeError('The program has run out of 3-digit seeds to use. Consider'
'clean up the results folder.')
while seed in seeds:
seed = (seed+1) % 1000
return seed

#%%
def run_UA_SA(seed=None, N=N, T=T, t_step=t_step, thresholds=[], kind=kind, analysis=analysis, plot=False):
seed = seed or seed_RGT()
mdl = create_model(kind=kind, analysis=analysis)
mdl = run_uncertainty(mdl, N, T, t_step, seed=seed)
thresholds = update_thresholds(mdl, thresholds)
D, p = get_correlations(mdl, kind='KS', thresholds=thresholds,
file=ospath.join(results_path, f'KS_test_{seed}.xlsx')
)
if plot:
plot_cdf_by_group(mdl, thresholds=thresholds)
fig, ax = plot_correlations(D, close_fig=False,
file=ospath.join(figures_path, 'KS_test_D.png'))
return mdl

def update_thresholds(mdl, thresholds, metrics=None, quantile=0.25):
metrics = metrics or mdl.metrics
thresholds = thresholds or [None]*len(metrics)
data = mdl.table[var_indices(metrics)]
for i, col in enumerate(data):
if thresholds[i] is None:
thresholds[i] = data[col].quantile(quantile)
return thresholds

def plot_cdf_by_group(mdl=None, seed=None, thresholds=None, parameters=None, metrics=None, kind=kind, analysis=analysis):
if mdl is None:
# global mdl
mdl = create_model(kind=kind, analysis=analysis)
mdl.table = load_data(ospath.join(results_path, f'table_{seed}.xlsx'), header=[0, 1])
metrics = metrics or mdl.metrics
parameters = parameters or mdl.parameters
thresholds = thresholds or update_thresholds(mdl, thresholds, metrics)
x_df = mdl.table[var_indices(parameters)]
y_df = mdl.table[var_indices(metrics)]
ncol = 4
nrow = ceil(x_df.shape[1]/ncol)
for m, t in zip(y_df.items(), thresholds):
y, err = m
group = err <= t
fig, axes = plt.subplots(nrow, ncol, #sharey=True,
figsize=(ncol*4, nrow*4),
layout='constrained')
for col, ax in zip(x_df, axes.ravel()):
sns.kdeplot(data=x_df[col][group], ax=ax,
cumulative=False, common_norm=True,
# cumulative=True,
label=f'{y[-1]} <= {round(t,2)}')
sns.kdeplot(data=x_df[col][1-group], ax=ax,
cumulative=False, common_norm=True,
# cumulative=True,
label=f'{y[-1]} > {round(t,2)}')
ax.tick_params(axis='both', which='both', direction='inout')
ax.legend()
ax.set_xlabel(col[-1])
ax.set_ylabel('density')
# fig.subplots_adjust(hspace=0.4, wspace=0.05, bottom=0.2)
fig.savefig(ospath.join(figures_path, f'pdf_{y[-1]}.png'),
dpi=300, facecolor='white')
del fig, axes

def KS_test_var_thresholds(mdl=None, seed=None, kind=kind, analysis=analysis):
if mdl is None:
mdl = create_model(kind=kind, analysis=analysis)
mdl.table = load_data(ospath.join(results_path, f'table_{seed}.xlsx'),
header=[0,1])
sig = []
thresholds = []
quantiles = np.linspace(0.05, 0.5, 10)
for q in quantiles:
thrs = update_thresholds(mdl, [], quantile=q)
D, p = mdl.kolmogorov_smirnov_d(thresholds=thrs)
thresholds.append(thrs)
sig.append(p < 0.05)
out = {m: pd.DataFrame() for m in var_indices(mdl.metrics)}
thresholds = np.asarray(thresholds).T

for s, q in zip(sig, quantiles):
for m, df in out.items():
df[q] = s[m]
with pd.ExcelWriter(ospath.join(results_path, f'sig_params_{seed}.xlsx')) as writer:
for m, thrs in zip(out, thresholds):
df = out[m]
df.index = df.index.droplevel()
df.columns = pd.MultiIndex.from_tuples([(col, t) for col,t \
in zip(df.columns, thrs)],
names=['quantile', 'NRMSE'])
df.to_excel(writer, sheet_name=m[-1])

#%%
if __name__ == '__main__':

# seed = 201 # include, N=1000
# seed = 502 # include, N=2000
# seed = 503 # include, N=3000
# seed = 505 # include, N=5000
# seed = 400 # include, N=10000
# seed = 206 # include, N=15000
seed = 210 # include, N=20000

mdl = run_UA_SA(seed=seed)
plot_cdf_by_group(seed=seed)
KS_test_var_thresholds(mdl=mdl,seed=seed)
Loading

0 comments on commit 742b96e

Please sign in to comment.