Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SCT4 benchmark #555

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ env:

agents:
queue: new-central
modules: climacommon/2024_04_05
modules: climacommon/2024_05_27

steps:

Expand Down
273 changes: 273 additions & 0 deletions experiments/SCT4_benchmark/config_linreg_all_LES.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
#= Custom calibration configuration file. =#

using Distributions
using StatsBase
using LinearAlgebra
using Random
using CalibrateEDMF
using CalibrateEDMF.ModelTypes
using CalibrateEDMF.DistributionUtils
using CalibrateEDMF.ReferenceModels
using CalibrateEDMF.ReferenceStats
using CalibrateEDMF.LESUtils
using CalibrateEDMF.TurbulenceConvectionUtils
using CalibrateEDMF.ModelTypes
using CalibrateEDMF.HelperFuncs
import CalibrateEDMF.LESUtils: get_shallow_LES_library
# Import EKP modules
using JLD2
using EnsembleKalmanProcesses
using EnsembleKalmanProcesses.Localizers
using EnsembleKalmanProcesses.ParameterDistributions

using TurbulenceConvection
const TC = TurbulenceConvection

include("../../../tools/DiagnosticsTools.jl")
# Cases defined as structs for quick access to default configs
struct SCT3Train end
struct SCT3Val end

restart_ds_path = <PATH_TO_PRECALIBRATION_DIAGNOSTICS_FILE>
optimal_u_names, optimal_u = optimal_parameters(restart_ds_path; method = "last_nn_particle_mean")


SCM_RUN_TIME_HR = 72.0
T_START_HR = 60.0

LES_LENGTH_HR = 6.0 * 24
NUM_LES_CASES = 176

NUM_NN_PARAMS = 12

batch = 16 # Possible batch sizes are [2, 4, 8, 11, 16, 22, 44, 88, 176]

function to_float64_vector(value)
if isa(value, AbstractVector)
return convert(Vector{Float64}, value)
else
return convert(Vector{Float64}, [value])
end
end


namelist_args = [
("time_stepping", "dt_min", 0.5),
("time_stepping", "dt_max", 5.0),
("stats_io", "frequency", 60.0),
("time_stepping", "t_max", 3600.0 * SCM_RUN_TIME_HR),
("t_interval_from_end_s", 3600.0 * SCM_RUN_TIME_HR),
("thermodynamics", "sgs", "mean"),
("turbulence", "EDMF_PrognosticTKE", "surface_area_bc", "Prognostic"),
# Add namelist_args defining entrainment closure, e.g.
("turbulence", "EDMF_PrognosticTKE", "entrainment_type", "total_rate"),
("turbulence", "EDMF_PrognosticTKE", "entr_dim_scale", "w_height"),
("turbulence", "EDMF_PrognosticTKE", "detr_dim_scale", "mf_grad_rhoa"),
("turbulence", "EDMF_PrognosticTKE", "turbulent_entrainment_factor", 0.0),
("turbulence", "EDMF_PrognosticTKE", "entrainment", "None"),
("turbulence", "EDMF_PrognosticTKE", "ml_entrainment", "Linear"),
("turbulence", "EDMF_PrognosticTKE", "min_area", 1e-10),
("turbulence", "EDMF_PrognosticTKE", "limit_min_area", true),
("turbulence", "EDMF_PrognosticTKE", "area_limiter_scale", 0.0),
("turbulence", "EDMF_PrognosticTKE", "entr_pi_subset", (1, 2, 3, 4, 6)),
("turbulence", "EDMF_PrognosticTKE", "pi_norm_consts", [100.0, 2.0, 1.0, 1.0, 1.0, 1.0]),
("turbulence", "EDMF_PrognosticTKE", "entr_nondim_norm_factor", 1.0),
("turbulence", "EDMF_PrognosticTKE", "detr_nondim_norm_factor", 1.0),
("turbulence", "EDMF_PrognosticTKE", "linear_ent_params", zeros(NUM_NN_PARAMS)),
("turbulence", "EDMF_PrognosticTKE", "linear_ent_biases", true),
]

namelist = CalibrateEDMF.ReferenceModels.NameList.default_namelist("LES_driven_SCM"; write = false)
update_namelist!(namelist, namelist_args)
optimal_params_u_name, optimal_params_u = CalibrateEDMF.TurbulenceConvectionUtils.create_parameter_vectors(
optimal_u_names,
optimal_u,
do_nothing_param_map(),
namelist,
)


function get_config()
config = Dict()
# Flags for saving output data
config["output"] = get_output_config()
# Define regularization of inverse problem
config["regularization"] = get_regularization_config()
# Define reference used in the inverse problem
config["reference"] = get_reference_config(SCT3Train())
# Define reference used for validation
config["validation"] = get_reference_config(SCT3Val())
# Define the parameter priors
config["prior"] = get_prior_config()
# Define the kalman process
config["process"] = get_process_config()
# Define the SCM static configuration
config["scm"] = get_scm_config()
return config
end

function get_output_config()
config = Dict()
config["outdir_root"] = pwd()
return config
end

function get_regularization_config()
config = Dict()
# Regularization of observations: mean and covariance
config["perform_PCA"] = true # Performs PCA on data
config["variance_loss"] = 1.0e-2 # Variance truncation level in PCA
config["normalize"] = true # whether to normalize data by pooled variance
config["tikhonov_mode"] = "relative" # Tikhonov regularization
config["tikhonov_noise"] = 1.0e-6 # Tikhonov regularization
config["dim_scaling"] = true # Dimensional scaling of the loss

config["obs_var_scaling"] = Dict("ql_mean" => 0.1, "total_flux_s" => 0.1, "total_flux_qt"=> 0.1)

return config
end

function get_process_config()
config = Dict()
config["N_iter"] = 15
config["N_ens"] = 300 # Must be 2p+1 when algorithm is "Unscented"
config["algorithm"] = "Inversion" # "Sampler", "Unscented", "Inversion"
config["noisy_obs"] = false # Choice of covariance in evaluation of y_{j+1} in EKI. True -> Γy, False -> 0
# Artificial time stepper of the EKI.
config["scheduler"] = DataMisfitController(on_terminate = "continue")
config["accelerator"] = DefaultAccelerator()
# Whether to augment the outputs with the parameters for regularization
config["augmented"] = false
config["failure_handler"] = "sample_succ_gauss" #"high_loss" #"sample_succ_gauss"
# https://github.com/CliMA/EnsembleKalmanProcesses.jl/blob/main/src/Localizers.jl#L63
# use localizer when number of parameters > number of ensemble members
# config["localizer"] = SEC(0.5, 0.1) # First arg is strength of localization, second is the minimum correlation retained
return config
end

function get_reference_config(::SCT3Train)
config = Dict()
# Get shallow cases
les_library = get_shallow_LES_library()

ref_dirs = []
for model in keys(les_library)
for month in keys(les_library[model])
cfsite_numbers = Tuple(les_library[model][month]["cfsite_numbers"])
les_kwargs = (forcing_model = model, month = parse(Int, month), experiment = "amip")
append!(ref_dirs, [get_cfsite_les_dir(cfsite_number; les_kwargs...) for cfsite_number in cfsite_numbers])
end
end

ref_dirs = ref_dirs[1:NUM_LES_CASES]
n_repeat = length(ref_dirs)

config["case_name"] = repeat(["LES_driven_SCM"], n_repeat)
# Flag to indicate whether reference data is from a perfect model (i.e. SCM instead of LES)
config["y_reference_type"] = LES()
config["Σ_reference_type"] = LES()
config["y_names"] =
repeat([["s_mean", "ql_mean", "qt_mean", "total_flux_qt", "total_flux_s", "lwp_mean"]], n_repeat)
config["y_dir"] = ref_dirs
config["t_start"] = repeat([T_START_HR * 3600], n_repeat)
config["t_end"] = repeat([SCM_RUN_TIME_HR * 3600], n_repeat)
# Use full LES timeseries for covariance
config["Σ_t_start"] = repeat([-1 * (LES_LENGTH_HR - SCM_RUN_TIME_HR - 24.0) * 3600.0], n_repeat) # Don't compute covariances for the first 24 hours of the LES simulation
config["Σ_t_end"] = repeat([SCM_RUN_TIME_HR * 3600], n_repeat)
config["time_shift"] = SCM_RUN_TIME_HR * 3600.0
config["batch_size"] = batch # Possible batch sizes are [2, 4, 8, 11, 16, 22, 44, 88, 176]
config["write_full_stats"] = false
config["namelist_args"] = repeat([namelist_args], n_repeat)
return config
end

function get_reference_config(::SCT3Val)
config = Dict()
les_library = get_shallow_LES_library()

# AMIP4K data: July, NE Pacific
cfsite_numbers = (17, 18, 20, 22, 23)
les_kwargs = (forcing_model = "HadGEM2-A", month = 7, experiment = "amip4K")
ref_dirs = [get_cfsite_les_dir(cfsite_number; les_kwargs...) for cfsite_number in cfsite_numbers]

n_repeat = length(ref_dirs)

config["case_name"] = repeat(["LES_driven_SCM"], n_repeat)
# Flag to indicate whether reference data is from a perfect model (i.e. SCM instead of LES)
config["y_reference_type"] = LES()
config["Σ_reference_type"] = LES()
config["y_names"] =
repeat([["s_mean", "ql_mean", "qt_mean", "total_flux_qt", "total_flux_s", "lwp_mean"]], n_repeat)
config["y_dir"] = ref_dirs
config["t_start"] = repeat([T_START_HR * 3600], n_repeat)
config["t_end"] = repeat([SCM_RUN_TIME_HR * 3600], n_repeat)
# Use full LES timeseries for covariance
config["Σ_t_start"] = repeat([-1 * (LES_LENGTH_HR - SCM_RUN_TIME_HR - 24.0) * 3600.0], n_repeat) # Don't compute covariances for the first 24 hours of the LES simulation
config["Σ_t_end"] = repeat([SCM_RUN_TIME_HR * 3600], n_repeat)
config["time_shift"] = SCM_RUN_TIME_HR * 3600.0
config["write_full_stats"] = false
config["namelist_args"] = repeat([namelist_args], n_repeat)
return config
end

function get_prior_config()
config = Dict()
config["constraints"] = Dict(
"linear_ent_params" => [repeat([no_constraint()], NUM_NN_PARAMS)...],

# diffusion parameters
"tke_ed_coeff" => [bounded(0.01, 1.0)],
"tke_diss_coeff" => [bounded(0.01, 1.0)],
"static_stab_coeff" => [bounded(0.01, 1.0)],
"tke_surf_scale" => [bounded(1.0, 16.0)],
"Prandtl_number_0" => [bounded(0.5, 1.5)],

# momentum exchange parameters
"pressure_normalmode_adv_coeff" => [bounded(0.0, 100.0)],
"pressure_normalmode_buoy_coeff1" => [bounded(0.0, 10.0)],
"pressure_normalmode_drag_coeff" => [bounded(0.0, 50.0)],

"min_area_limiter_scale" => [bounded(1.0, 100.0)],
"min_area_limiter_power" => [bounded(1000.0, 5000.0)],
)

# TC.jl prior mean
optimal_params_u_vec = [to_float64_vector(v) for v in optimal_params_u]
config["prior_mean"] = Dict(optimal_params_u_name .=> optimal_params_u_vec)


linear_entr_unc_sigma = repeat([5.0], Int(NUM_NN_PARAMS / 2))
linear_detr_unc_sigma = repeat([5.0], Int(NUM_NN_PARAMS / 2))

linear_entr_unc_sigma[end] = 0.25
linear_detr_unc_sigma[end] = 0.25

non_vec_sigma = 1.0
config["unconstrained_σ"] = Dict(
# data-driven entrainment parameters
"linear_ent_params" => cat(linear_entr_unc_sigma, linear_detr_unc_sigma, dims = 1),

# diffusion parameters
"tke_ed_coeff" => [0.5],
"tke_diss_coeff" => [0.5],
"static_stab_coeff" => [non_vec_sigma],
"tke_surf_scale" => [0.5],
"Prandtl_number_0" => [non_vec_sigma],

# momentum exchange parameters
"pressure_normalmode_adv_coeff" => [non_vec_sigma],
"pressure_normalmode_buoy_coeff1" => [non_vec_sigma],
"pressure_normalmode_drag_coeff" => [non_vec_sigma],

"min_area_limiter_scale" => [1.0],
"min_area_limiter_power" => [1.0],

)
return config
end

function get_scm_config()
config = Dict()
config["namelist_args"] = namelist_args
return config
end
Loading
Loading