diff --git a/preliz/internal/optimization.py b/preliz/internal/optimization.py index 8398eef6..436b63fa 100644 --- a/preliz/internal/optimization.py +++ b/preliz/internal/optimization.py @@ -259,44 +259,35 @@ def optimize_pymc_model( fmodel, target, num_draws, - bounds, + opt_iterations, initial_guess, - prior, - preliz_model, - transformed_var_info, rng, ): - for idx in range(401): + prior_array = np.zeros((opt_iterations, len(initial_guess))) + + for idx in range(opt_iterations + 1): # can we sample systematically from these and less random? # This should be more flexible and allow other targets than just # a PreliZ distribution if isinstance(target, list): obs = get_weighted_rvs(target, num_draws, rng) else: - obs = target.rvs(num_draws, random_state=rng) + obs = target.rvs(num_draws, idx) result = minimize( fmodel, initial_guess, tol=0.001, - method="SLSQP", - args=(obs, transformed_var_info, preliz_model), - bounds=bounds, + method="powell", + args=(obs), ) - optimal_params = result.x - # To help minimize the effect of priors - # We don't save the first result and insteas we use it as the initial guess - # for the next optimization - # Updating the initial guess also helps to provides more spread samples - initial_guess = optimal_params + # To help minimize the effect of priors we don't save the first result + # and instead we use it as the initial guess for the next optimization + # Updating the initial guess also helps reduce computational times if idx: - for key, param in zip(prior.keys(), optimal_params): - prior[key].append(param) - - # convert to numpy arrays - for key, value in prior.items(): - prior[key] = np.array(value) + prior_array[idx - 1] = result.x + initial_guess = result.x - return prior + return prior_array def relative_error(dist, lower, upper, required_mass): diff --git a/preliz/ppls/agnostic.py b/preliz/ppls/agnostic.py index 7b439b6e..b840baf7 100644 --- a/preliz/ppls/agnostic.py +++ b/preliz/ppls/agnostic.py @@ -17,7 +17,11 @@ from preliz.internal.plot_helper import plot_repr from preliz.distributions import Gamma, Normal, HalfNormal from preliz.unidimensional.mle import mle -from preliz.ppls.pymc_io import get_model_information, write_pymc_string +from preliz.ppls.pymc_io import ( + extract_preliz_distributions, + retrieve_variable_info, + write_pymc_string, +) from preliz.ppls.bambi_io import ( get_pymc_model, write_bambi_string, @@ -65,14 +69,15 @@ def posterior_to_prior(model, idata, new_families=None, engine="auto"): if engine == "bambi": model = get_pymc_model(model) - _, _, preliz_model, _, untransformed_var_info, *_ = get_model_information(model) + preliz_model = extract_preliz_distributions(model) + var_info, _ = retrieve_variable_info(model) new_priors = back_fitting_idata(idata, preliz_model, new_families) if engine == "bambi": - new_model = write_bambi_string(new_priors, untransformed_var_info) + new_model = write_bambi_string(new_priors, var_info) elif engine == "pymc": - new_model = write_pymc_string(new_priors, untransformed_var_info) + new_model = write_pymc_string(new_priors, var_info) return new_model diff --git a/preliz/ppls/bambi_io.py b/preliz/ppls/bambi_io.py index e31fcf5a..74f0fabe 100644 --- a/preliz/ppls/bambi_io.py +++ b/preliz/ppls/bambi_io.py @@ -2,9 +2,12 @@ import importlib import inspect +from copy import copy import re from sys import modules +import numpy as np + def get_pymc_model(model): if not model.built: @@ -19,17 +22,36 @@ def write_bambi_string(new_priors, var_info): So the user can copy and paste, ideally with none to minimal changes. """ header = "{\n" + variables = [] + names = list(new_priors.keys()) for key, value in new_priors.items(): - dist_name, dist_params = repr(value).split("(") - dist_params = dist_params.rstrip(")") - size = var_info[key][1] - if size > 1: - header += f'"{key}" : bmb.Prior("{dist_name}", {dist_params}, shape={size}),\n' + idxs = var_info[key][-1] + if idxs: + for i in idxs: + nkey = names[i] + cp_dist = copy(new_priors[nkey]) + cp_dist._fit_moments(np.mean(value.mean()), np.mean(value.std())) + + dist_name, dist_params = repr(cp_dist).split("(") + size = var_info[nkey][1] + if size > 1: + variables[ + i + ] = f'"{nkey}" : bmb.Prior("{dist_name}", {dist_params}, shape={size}),\n' + else: + variables[i] = f'"{nkey}" : bmb.Prior("{dist_name}", {dist_params}),\n' else: - header += f'"{key}" : bmb.Prior("{dist_name}", {dist_params}),\n' - - header = header.rstrip(", ") + "}" - return header + dist_name, dist_params = repr(value).split("(") + dist_params = dist_params.rstrip(")") + size = var_info[key][1] + if size > 1: + variables.append( + f'"{key}" : bmb.Prior("{dist_name}", {dist_params}, shape={size}),\n' + ) + else: + variables.append(f'"{key}" : bmb.Prior("{dist_name}", {dist_params}),\n') + + return "".join([header] + variables) def from_bambi(fmodel, draws): diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index 47a0cbdf..2250bd88 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -7,9 +7,10 @@ import numpy as np try: - from pytensor.tensor import vector, TensorConstant + from pytensor.tensor import matrix, TensorConstant + from pytensor import function from pytensor.graph.basic import ancestors - from pymc import logp, compile_pymc + from pymc.pytensorf import compile_pymc, join_nonshared_inputs from pymc.util import is_transformed_name, get_untransformed_name except ModuleNotFoundError: pass @@ -17,7 +18,7 @@ from preliz.internal.distribution_helper import get_distributions -def back_fitting_pymc(prior, preliz_model, untransformed_var_info): +def back_fitting_pymc(prior, preliz_model, var_info): """ Fit the samples from prior into user provided model's prior. from the perspective of ppe "prior" is actually an approximated posterior @@ -26,85 +27,61 @@ def back_fitting_pymc(prior, preliz_model, untransformed_var_info): We need probability distributions. """ new_priors = {} - for key, size_inf in untransformed_var_info.items(): - if not size_inf[2]: - size = size_inf[1] - if size > 1: - params = [] - for i in range(size): - value = prior[f"{key}__{i}"] - dist = preliz_model[key] - dist._fit_mle(value) - params.append(dist.params) - dist._parametrization(*[np.array(x) for x in zip(*params)]) - else: - value = prior[key] - dist = preliz_model[key] - dist._fit_mle(value) + for rv_name, (_, size, *_) in var_info.items(): + if size > 1: + params = [] + for i in range(size): + opt_values = prior[rv_name][:, i] + dist = preliz_model[rv_name] + dist._fit_mle(opt_values) + params.append(dist.params) + dist._parametrization(*[np.array(x) for x in zip(*params)]) + else: + opt_values = prior[rv_name] + dist = preliz_model[rv_name] + dist._fit_mle(opt_values) - new_priors[key] = dist + new_priors[rv_name] = dist return new_priors -def compile_logp(model): +def compile_mllk(model): """ - Compile the log-likelihood function for the model. - We need to be able to condition it on parameters or data. - Because during the optimization routine we need to change both. - Currently this will fail for a prior that depends on other prior. + Compile the log-likelihood function for the model to be able to condition on both + data and parameters. """ - value = vector("value") - rv_logp = logp(*model.observed_RVs, value) - rv_logp_fn = compile_pymc([*model.free_RVs, value], rv_logp, on_unused_input="ignore") - rv_logp_fn.trust_input = True + obs_rvs = model.observed_RVs[0] + old_y_value = model.rvs_to_values[obs_rvs] + new_y_value = obs_rvs.type() + model.rvs_to_values[obs_rvs] = new_y_value - def fmodel(params, obs, var_info, p_model): - params = reshape_params(model, var_info, p_model, params) - return -rv_logp_fn(*params, obs).sum() + vars_ = model.value_vars + initial_point = model.initial_point() - return fmodel + [logp], raveled_inp = join_nonshared_inputs( + point=initial_point, outputs=[model.datalogp], inputs=vars_ + ) + rv_logp_fn = compile_pymc([raveled_inp, new_y_value], logp) + rv_logp_fn.trust_input = True -def get_pymc_to_preliz(): - """ - Generate dictionary mapping pymc to preliz distributions - """ - all_distributions = [ - dist - for dist in modules["preliz.distributions"].__all__ - if dist not in ["Truncated", "Censored", "Hurdle", "Mixture"] - ] - pymc_to_preliz = dict( - zip([dist.lower() for dist in all_distributions], get_distributions(all_distributions)) - ) - return pymc_to_preliz + def fmodel(params, obs): + return -rv_logp_fn(params, obs).sum() + + return fmodel, old_y_value, obs_rvs -def get_initial_guess(model, free_rvs): +def get_initial_guess(model): """ Get initial guess for optimization routine. """ - init = [] - - free_rvs_names = [rv.name for rv in free_rvs] - for key, value in model.initial_point().items(): - - if is_transformed_name(key): - name = get_untransformed_name(key) - value = model.rvs_to_transforms[model.named_vars[name]].backward(value).eval() - else: - name = key - - if name in free_rvs_names: - init.append(value) + return np.concatenate([np.ravel(value) for value in model.initial_point().values()]) - return np.concatenate([np.atleast_1d(arr) for arr in init]).flatten() - -def get_model_information(model): # pylint: disable=too-many-locals +def extract_preliz_distributions(model): """ - Get information from a PyMC model. + Extract the corresponding PreliZ distributions from a PyMC model Parameters ---------- @@ -112,71 +89,76 @@ def get_model_information(model): # pylint: disable=too-many-locals Returns ------- - bounds : a list of tuples with the support of each marginal distribution in the model - prior : a dictionary with a key for each marginal distribution in the model and an empty - list as value. This will be filled with the samples from a backfitting procedure. - preliz_model : a dictionary with a key for each marginal distribution in the model and the - corresponding PreliZ distribution as value - transformed_var_info : a dictionary with a key for each transformed variable in the model - and a tuple with the shape, size and the indexes of the non-constant parents as value - untransformed_var_info : same as `transformed_var_info` but the keys are untransformed - variable names - num_draws : the number of observed samples - free_rvs : a list with the free random variables in the model + preliz_model : a dictionary of RVs names as keys and PreliZ distributions as values + num_draws : the sample size of the observed """ + all_distributions = [ + dist + for dist in modules["preliz.distributions"].__all__ + if dist not in ["Truncated", "Censored", "Hurdle", "Mixture"] + ] + pymc_to_preliz = dict( + zip([dist.lower() for dist in all_distributions], get_distributions(all_distributions)), + ) - bounds = [] - prior = {} preliz_model = {} - transformed_var_info = {} - untransformed_var_info = {} - free_rvs = [] - pymc_to_preliz = get_pymc_to_preliz() - rvs_to_values = model.rvs_to_values - for r_v in model.free_RVs: - r_v_eval = r_v.eval() - size = r_v_eval.size - shape = r_v_eval.shape - nc_parents = non_constant_parents(r_v, model.free_RVs) - name = ( + dist_name = ( r_v.owner.op.name if r_v.owner.op.name else str(r_v.owner.op).split("RV", 1)[0].lower() ) - dist = copy(pymc_to_preliz[name]) + dist = copy(pymc_to_preliz[dist_name]) preliz_model[r_v.name] = dist + + return preliz_model + + +def retrieve_variable_info(model): + """ + Get the shape, size, transformation and parents of each free random variable in a PyMC model. + """ + + var_info = {} + initial_point = model.initial_point() + for v_var in model.value_vars: + name = v_var.name + rvs = model.values_to_rvs[v_var] + nc_parents = non_constant_parents(rvs, model) + idx_parents = [] if nc_parents: - idxs = [free_rvs.index(var_) for var_ in nc_parents] - # the keys are the name of the (transformed) variable - transformed_var_info[rvs_to_values[r_v].name] = (shape, size, idxs) - # the keys are the name of the (untransformed) variable - untransformed_var_info[r_v.name] = (shape, size, idxs) - else: - free_rvs.append(r_v) + idx_parents = [model.free_RVs.index(var_) for var_ in nc_parents] - if size > 1: - for i in range(size): - bounds.append(dist.support) - prior[f"{r_v.name}__{i}"] = [] - else: - bounds.append(dist.support) - prior[r_v.name] = [] + if is_transformed_name(name): + name = get_untransformed_name(name) + x_var = matrix(f"{name}_transformed") + z_var = model.rvs_to_transforms[rvs].backward(x_var) + transformation = function(inputs=[x_var], outputs=z_var) + else: + transformation = None - # the keys are the name of the (transformed) variable - transformed_var_info[rvs_to_values[r_v].name] = (shape, size, nc_parents) - # the keys are the name of the (untransformed) variable - untransformed_var_info[r_v.name] = (shape, size, nc_parents) + var_info[name] = ( + initial_point[v_var.name].shape, + initial_point[v_var.name].size, + transformation, + idx_parents, + ) num_draws = model.observed_RVs[0].eval().size - return ( - bounds, - prior, - preliz_model, - transformed_var_info, - untransformed_var_info, - num_draws, - free_rvs, - ) + return var_info, num_draws + + +def unravel_projection(prior_array, var_info, iterations): + size = 0 + prior_dict = {} + for key, values in var_info.items(): + shape, new_size, transformation, _ = values + vector = prior_array[:, size : size + new_size] + if transformation is not None: + vector = transformation(vector) + prior_dict[key] = vector.reshape(iterations, *shape).squeeze() + size += new_size + + return prior_dict def write_pymc_string(new_priors, var_info): @@ -186,49 +168,46 @@ def write_pymc_string(new_priors, var_info): """ header = "with pm.Model() as model:\n" - + variables = [] + names = list(new_priors.keys()) for key, value in new_priors.items(): - dist_name, dist_params = repr(value).split("(") - size = var_info[key][1] - if size > 1: - dist_params = dist_params.split(")")[0] - header += f'{key:>4} = pm.{dist_name}("{key}", {dist_params}, shape={size})\n' - else: - header += f'{key:>4} = pm.{dist_name}("{key}", {dist_params}\n' - - return header - - -def reshape_params(model, var_info, p_model, params): - """ - We flatten the parameters to be able to use them in the optimization routine. - """ - size = 0 - value = [] - for var in model.value_vars: - shape, new_size, idxs = var_info[var.name] + idxs = var_info[key][-1] if idxs: - dist = p_model[var.name] - dist._parametrization(*params[idxs]) - if new_size > 1: - value.append(np.repeat(dist.mean(), new_size)) - else: - value.append(dist.mean()) - size += new_size + for i in idxs: + nkey = names[i] + cp_dist = copy(new_priors[nkey]) + cp_dist._fit_moments(np.mean(value.mean()), np.mean(value.std())) + + dist_name, dist_params = repr(cp_dist).split("(") + size = var_info[nkey][1] + if size > 1: + dist_params = dist_params.split(")")[0] + variables[ + i + ] = f' {nkey:} = pm.{dist_name}("{nkey}", {dist_params}, shape={size})\n' + else: + variables[i] = f' {nkey:} = pm.{dist_name}("{nkey}", {dist_params}\n' + else: - var_samples = params[size : size + new_size] - value.append(var_samples.reshape(shape)) - size += new_size + dist_name, dist_params = repr(value).split("(") + size = var_info[key][1] + if size > 1: + dist_params = dist_params.split(")")[0] + variables.append( + f' {key:} = pm.{dist_name}("{key}", {dist_params}, shape={size})\n' + ) + else: + variables.append(f' {key:} = pm.{dist_name}("{key}", {dist_params}\n') - return value + return "".join([header] + variables) -def non_constant_parents(var_, free_rvs): +def non_constant_parents(rvs, model): """Find the parents of a variable that are not constant.""" parents = [] - for variable in var_.get_parents()[0].inputs[2:]: + for variable in rvs.get_parents()[0].inputs[2:]: if not isinstance(variable, TensorConstant): - for free_rv in free_rvs: + for free_rv in model.free_RVs: if free_rv in list(ancestors([variable])) and free_rv not in parents: parents.append(free_rv) return parents diff --git a/preliz/predictive/ppe.py b/preliz/predictive/ppe.py index 4828893f..cb9ffe11 100644 --- a/preliz/predictive/ppe.py +++ b/preliz/predictive/ppe.py @@ -7,9 +7,11 @@ from preliz.ppls.bambi_io import get_pymc_model, write_bambi_string from preliz.ppls.agnostic import back_fitting_idata, get_engine from preliz.ppls.pymc_io import ( - get_model_information, + extract_preliz_distributions, + retrieve_variable_info, + unravel_projection, get_initial_guess, - compile_logp, + compile_mllk, back_fitting_pymc, write_pymc_string, ) @@ -58,6 +60,7 @@ def ppe(model, target, method="projective", engine="auto", random_state=0): """This method is experimental and under development with no guarantees of correctness. Use with caution and triple-check the results.""" ) + opt_iterations = 400 rng = np.random.default_rng(random_state) engine = get_engine(model) if engine == "auto" else engine @@ -65,53 +68,48 @@ def ppe(model, target, method="projective", engine="auto", random_state=0): # Get models information if engine == "bambi": model = get_pymc_model(model) - ( - bounds, - prior, - preliz_model, - transformed_var_info, - untransformed_var_info, - num_draws, - free_rvs, - ) = get_model_information(model) + + preliz_model = extract_preliz_distributions(model) + var_info, num_draws = retrieve_variable_info(model) # With the projective method we attempt to find a prior that induces # a prior predictive distribution as close as possible to the target distribution if method == "projective": # Initial point for optimization - initial_guess = get_initial_guess(model, free_rvs) + initial_guess = get_initial_guess(model) # compile PyMC model - fmodel = compile_logp(model) - projection = optimize_pymc_model( + fmodel, old_y_value, obs_rvs = compile_mllk(model) + projection_raveled = optimize_pymc_model( fmodel, target, num_draws, - bounds, + opt_iterations, initial_guess, - prior, - preliz_model, - transformed_var_info, rng, ) + # restore obs_rvs value in the model + model.rvs_to_values[obs_rvs] = old_y_value + + projection_unraveled = unravel_projection(projection_raveled, var_info, opt_iterations) + # Backfit `projected_posterior` into the model's prior-families - new_priors = back_fitting_pymc(projection, preliz_model, untransformed_var_info) + projection_backfitted = back_fitting_pymc(projection_unraveled, preliz_model, var_info) + if engine == "bambi": - return write_bambi_string(new_priors, untransformed_var_info) - if engine == "pymc": - return write_pymc_string(new_priors, untransformed_var_info) + new_priors = write_bambi_string(projection_backfitted, var_info) + elif engine == "pymc": + new_priors = write_pymc_string(projection_backfitted, var_info) - # Fit the samples to the original prior distribution - # or to a set of predefined distributions elif method == "pathfinder": from pymc_experimental import fit # pylint:disable=import-outside-toplevel with model: - idata = fit(method="pathfinder", num_samples=1000) + idata = fit(method="pathfinder", num_samples=opt_iterations) - new_priors = back_fitting_idata(idata, preliz_model, new_families=False) - if engine == "bambi": - new_model = write_bambi_string(new_priors, untransformed_var_info) - elif engine == "pymc": - new_model = write_pymc_string(new_priors, untransformed_var_info) + projection_backfitted = back_fitting_idata(idata, preliz_model, new_families=False) + if engine == "bambi": + new_priors = write_bambi_string(projection_backfitted, var_info) + elif engine == "pymc": + new_priors = write_pymc_string(projection_backfitted, var_info) - return new_model + return new_priors