diff --git a/pyproject.toml b/pyproject.toml index f5ae3fa7..7b69b058 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "onemod" -version = "0.2.0" +version = "0.2.1" description = "A pipeline package for estimating scalar quantity by leveraging covariates and correlations across multiple dimensions." readme = "README.md" requires-python = ">=3.10" @@ -25,12 +25,12 @@ dependencies = [ "loguru", "modrover @ git+https://github.com/ihmeuw-msca/modrover.git@v0.1.3", "regmodsm @ git+https://github.com/ihmeuw-msca/regmodsm.git@v0.1.1", - "pandas < 2.0", + "pandas", "pyarrow", "pydantic", "scipy", "regmod == 0.1.1", - "weighted-average == 1.1.1", + "weighted-average == 1.1.2", ] [project.optional-dependencies] diff --git a/src/onemod/actions/models/regmod_smooth_model.py b/src/onemod/actions/models/regmod_smooth_model.py index e4765a8d..bdcbe5dd 100644 --- a/src/onemod/actions/models/regmod_smooth_model.py +++ b/src/onemod/actions/models/regmod_smooth_model.py @@ -1,6 +1,7 @@ """Run regmod smooth model, currently the main goal of this step is to smooth the covariate coefficients across age groups. """ + from functools import partial from typing import Callable @@ -58,8 +59,8 @@ def get_residual_computation_function( def get_residual_se_function( model_type: str, - col_obs: str, col_pred: str, + col_weights: str, ) -> Callable: """ Calculate the residual standard error for a given row based on the specified model type. @@ -67,8 +68,8 @@ def get_residual_se_function( Parameters: row (pd.Series): The row containing the observation and prediction data. model_type (str): Type of the statistical model (e.g., 'binomial', 'poisson', 'tobit'). - col_obs (str): Column name for the observed values. col_pred (str): Column name for the predicted values. + col_weights (str): Column name for the weights. Returns: float: The calculated residual standard error value. @@ -79,12 +80,19 @@ def get_residual_se_function( callable_map = { "binomial": partial( - lambda row, obs, pred: 1 / np.sqrt(row[col_pred] * (1 - row[col_pred])), - obs=col_obs, + lambda row, pred, weights: 1 + / np.sqrt(row[weights] * row[pred] * (1 - row[pred])), + pred=col_pred, + weights=col_weights, + ), + "poisson": partial( + lambda row, pred, weights: 1 / np.sqrt(row[weights] * row[pred]), pred=col_pred, + weights=col_weights, + ), + "gaussian": partial( + lambda row, weights: 1 / np.sqrt(row[weights]), weights=col_weights ), - "poisson": partial(lambda row, pred: 1 / np.sqrt(row[col_pred]), pred=col_pred), - "gaussian": lambda *args, **kwargs: 1.0, } try: @@ -177,7 +185,9 @@ def regmod_smooth_model(experiment_dir: str) -> None: for var_group in var_groups: cov = var_group["col"] if "uprior" not in var_group: - var_group["uprior"] = tuple(map(float, coef_bounds.get(cov, [-np.inf, np.inf]))) + var_group["uprior"] = tuple( + map(float, coef_bounds.get(cov, [-np.inf, np.inf])) + ) if "lam" not in var_group: var_group["lam"] = lam @@ -210,8 +220,8 @@ def regmod_smooth_model(experiment_dir: str) -> None: residual_se_func = get_residual_se_function( model_type=regmod_smooth_config.mtype, - col_obs=global_config.col_obs, col_pred=global_config.col_pred, + col_weights=regmod_smooth_config.model.weights, ) df["residual"] = df.apply( residual_func, diff --git a/src/onemod/actions/models/weave_model.py b/src/onemod/actions/models/weave_model.py index c31346ae..5b4b59c1 100644 --- a/src/onemod/actions/models/weave_model.py +++ b/src/onemod/actions/models/weave_model.py @@ -19,6 +19,7 @@ "exponential": "radius", "tricubic": "exponent", "depth": "radius", + "variance": "radius", } diff --git a/src/onemod/schema/config.py b/src/onemod/schema/config.py deleted file mode 100644 index e276e042..00000000 --- a/src/onemod/schema/config.py +++ /dev/null @@ -1,127 +0,0 @@ -from typing import Any, Optional - -from modrover.globals import model_type_dict -from pydantic import BaseModel, ConfigDict, FilePath, ValidationError -from pydantic.functional_validators import field_validator - - -class ParametrizedBaseModel(BaseModel): - """An extension of BaseModel that supports __getitem__ and is configured.""" - - model_config = ConfigDict(extra="allow", frozen=False, validate_assignment=True) - - def __getitem__(self, item: Any) -> Any: - return getattr(self, item) - - -class RoverConfiguration(ParametrizedBaseModel): - groupby: list[str] = [] - # TODO: This clashes with pydantic naming conventions and will raise warnings - model_type: str - cov_fixed: list[str] = [] - cov_exploring: list[str] = [] - weights: str - holdouts: list[str] = [] - fit_args: dict = {} - - parent_args: dict = {} - - @field_validator("model_type") - @classmethod - def valid_model_type(cls, model_type: str) -> str: - assert ( - model_type in model_type_dict - ), f"model_type must be one of {model_type_dict.keys()}" - return model_type - - @field_validator("fit_args") - @classmethod - def valid_fit_args(cls, fit_args: dict) -> dict: - # TODO: Necessary or not to import and validate? - # Could import Rover.fit and inspect the args - return fit_args - - -class RegmodSmoothConfiguration(ParametrizedBaseModel): - model_type: str - dims: list[dict] = [] - var_groups: list[dict] = [] - weights: str - fit_args: dict = {} - inv_link: str - coef_bounds: dict[str, list[float]] = {} - lam: float = 0.0 - - parent_args: dict = {} - - @field_validator("model_type") - @classmethod - def valid_model_type(cls, model_type: str) -> str: - if model_type not in model_type_dict: - raise ValidationError(f"model_type must be one of {model_type_dict.keys()}") - return model_type - - -class WeaveConfiguration(ParametrizedBaseModel): - # TODO - pass - - -class SwimrConfiguration(ParametrizedBaseModel): - # TODO - pass - - -class EnsembleConfiguration(ParametrizedBaseModel): - # TODO - pass - - -class ParentConfiguration(ParametrizedBaseModel): - input_path: FilePath # FilePath auto-validates that the path exists and is a file - col_id: list[str] - col_obs: str - col_pred: str - col_holdout: list[str] - col_test: str - col_sigma: str = "" - max_attempts: int = 3 - max_batch: int = -1 - - rover_covsel: Optional[RoverConfiguration] = None - regmod_smooth: Optional[RegmodSmoothConfiguration] = None - weave: Optional[WeaveConfiguration] = None - swimr: Optional[SwimrConfiguration] = None - ensemble: Optional[EnsembleConfiguration] = None - - def __init__(self, **data): - super().__init__(**data) - # Pass global attributes to children - global_vals = { - "input_path": self.input_path, - "col_id": self.col_id, - "col_obs": self.col_obs, - "col_pred": self.col_pred, - "col_holdout": self.col_holdout, - "col_test": self.col_test, - "col_sigma": self.col_sigma, - "max_attempts": self.max_attempts, - "max_batch": self.max_batch, - } - - child_models = [ - self.rover_covsel, - self.regmod_smooth, - self.weave, - self.swimr, - self.ensemble, - ] - - for child_model in child_models: - if child_model: - # Store parent args on the child models, can be accessed if necessary - child_model.parent_args = global_vals - - @property - def extra_fields(self) -> set[str]: - return set(self.__dict__) - set(self.model_fields) diff --git a/tests/conftest.py b/tests/conftest.py index b14a3b26..6ef735dd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -85,6 +85,9 @@ def sample_input_data(temporary_directory): # Generate an observations column, random from 0 to 1 data["obs_rate"] = np.random.rand(len(data)) + # Add population for residual uncertainty computation + data["population"] = 1.0 + # Save to the temp directory os.mkdir(temporary_directory / "data") data_path = temporary_directory / "data" / "data.parquet"