Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retrofits onto Python 3.7 #20

Merged
merged 3 commits into from
Feb 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev_environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- defaults
- conda-forge
dependencies:
- python>=3.9
- python>3.7
- pandas
- numpy=1.19.2
- scipy
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- defaults
- conda-forge
dependencies:
- python>=3.9
- python>=3.7
- pandas
- numpy=1.19.2
- scipy
Expand Down
27 changes: 16 additions & 11 deletions gumbi/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,20 +124,21 @@ def __init__(self, log_vars=None, logit_vars=None, **kwargs):
log_vars = [log_vars] if isinstance(log_vars, str) else log_vars
if not isinstance(log_vars, list):
raise TypeError('log_vars must be a list or str')
self._transforms |= {var: [np.log, np.exp] for var in log_vars}
self._transforms.update({var: [np.log, np.exp] for var in log_vars})
if logit_vars is not None:
logit_vars = [logit_vars] if isinstance(logit_vars, str) else logit_vars
if not isinstance(logit_vars, list):
raise TypeError('logit_vars must be a list or str')
self._transforms |= {var: [logit, expit] for var in logit_vars}
self._transforms.update({var: [logit, expit] for var in logit_vars})
self._log_vars = log_vars if log_vars is not None else []
self._logit_vars = logit_vars if logit_vars is not None else []

def __or__(self, __dct) -> Standardizer:
new_dct = super().__or__(__dct)
# new_dct = super().__or__(__dct) # Use when Python>=3.9
new_dct = {**self, **__dct}
stdzr = Standardizer(**new_dct)
if isinstance(__dct, Standardizer):
stdzr.transforms = self.transforms | __dct.transforms
stdzr.transforms = {**self.transforms, **__dct.transforms} # Fix once Python >= 3.9
else:
stdzr.transforms = self.transforms
return stdzr
Expand Down Expand Up @@ -167,7 +168,7 @@ def log_vars(self, var_list):
if not isinstance(var_list, list):
raise TypeError('log_vars must be a list or str')
self._log_vars = var_list
self._transforms |= {var: [np.log, np.exp] for var in var_list}
self._transforms.update({var: [np.log, np.exp] for var in var_list}) # Fix once Python >= 3.9

@property
def logit_vars(self) -> list[str]:
Expand All @@ -180,7 +181,7 @@ def logit_vars(self, var_list):
if not isinstance(var_list, list):
raise TypeError('logit_vars must be a list or str')
self._logit_vars = var_list
self._transforms |= {var: [logit, expit] for var in var_list}
self._transforms.update({var: [logit, expit] for var in var_list}) # Fix once Python >= 3.9

@property
def transforms(self) -> dict:
Expand Down Expand Up @@ -232,7 +233,8 @@ def transform(self, name: str | pd.Series, μ: float = None, σ2: float = None)
float, tuple, or pd.Series
Transformed parameter, (mean, variance) of untransformed distribution, or untransformed Series
"""
if isinstance(series := name, pd.Series):
if isinstance(name, pd.Series):
series=name
return self._transform_value(series.name, series)
elif μ is None:
raise ValueError('μ cannot be None')
Expand All @@ -258,7 +260,8 @@ def untransform(self, name: str | pd.Series, μ: float = None, σ2: float = None
float, tuple, or pd.Series
Untransformed parameter, (mean, variance) of untransformed distribution, or untransformed Series
"""
if isinstance(series := name, pd.Series):
if isinstance(name, pd.Series):
series = name
return self._untransform_value(series.name, series)
if σ2 is None:
return self._untransform_value(name, μ)
Expand All @@ -283,7 +286,8 @@ def stdz(self, name: str | pd.Series, μ: float = None, σ2: float = None) -> fl
Standardized parameter, (mean, variance) of standardized distribution, or standardized Series
"""

if isinstance(series := name, pd.Series):
if isinstance(name, pd.Series):
series = name
return self._stdz_value(series.name, series)
if σ2 is None:
return self._stdz_value(name, μ)
Expand All @@ -308,7 +312,8 @@ def unstdz(self, name: str | pd.Series, μ: float = None, σ2: float = None) ->
Unstandardized parameter, (mean, variance) of unstandardized distribution, or unstandardized Series
"""

if isinstance(series := name, pd.Series):
if isinstance(name, pd.Series):
series = name
return self._unstdz_value(series.name, series)
if σ2 is None:
return self._unstdz_value(name, μ)
Expand Down Expand Up @@ -766,4 +771,4 @@ def from_wide(cls, wide, outputs=None, names_column='Variable', values_column='V

def update_stdzr(self):
"""Updates internal :class:`Standardizer` with current data, :attr:`log_vars`, and :attr:`logit_vars`."""
self.stdzr |= Standardizer.from_DataFrame(self.wide, log_vars=self.log_vars, logit_vars=self.logit_vars)
self.stdzr.update(Standardizer.from_DataFrame(self.wide, log_vars=self.log_vars, logit_vars=self.logit_vars)) # Fix once Python >= 3.9
16 changes: 8 additions & 8 deletions gumbi/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def as_dict(self):
def add_layers(self, **arrays):
"""Add additional layers at each index."""
arrays_ = arrays.as_dict() if isinstance(arrays, LayeredArray) else arrays
return LayeredArray(**(self.as_dict() | arrays_))
return LayeredArray(**{**self.as_dict(), **arrays_}) # Fix once Python >= 3.9


class ParameterArray(LayeredArray):
Expand Down Expand Up @@ -744,14 +744,14 @@ def __getitem__(self, item):

def sum(self, axis=None, dtype=None, out=None, keepdims=False, **kwargs) -> UncertainArray:
"""Summation with uncertainty propagation"""
kwargs |= dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims)
kwargs.update(dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims)) # Fix once Python >= 3.9
new = self._as_uncarray.sum(**kwargs)
extra = {dim: np.sum(self[dim]) for dim in self.fields if dim not in ['μ', 'σ2']}
return self._from_uncarray(self.name, new, **extra)

def mean(self, axis=None, dtype=None, out=None, keepdims=False, **kwargs) -> UncertainArray:
"""Mean with uncertainty propagation"""
kwargs |= dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims)
kwargs.update(dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims)) # Fix once Python >= 3.9
new = self._as_uncarray.mean(**kwargs)
extra = {dim: np.mean(self[dim]) for dim in self.fields if dim not in ['μ', 'σ2']}
return self._from_uncarray(self.name, new, **extra)
Expand Down Expand Up @@ -1042,13 +1042,13 @@ def dist(self) -> rv_continuous:

def sum(self, axis=None, dtype=None, out=None, keepdims=False, **kwargs):
self._warn_if_poorly_defined()
kwargs |= dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims)
kwargs.update(dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims))
z = self.z.sum(**kwargs)
return self._from_z(z)

def mean(self, axis=None, dtype=None, out=None, keepdims=False, **kwargs):
"""The natural-space distribution parameters which represent the mean of the transformed-space distributions"""
kwargs |= dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims)
kwargs.update(dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims))
z = self.z.mean(**kwargs)
return self._from_z(z)

Expand Down Expand Up @@ -1091,7 +1091,7 @@ def __add__(self, other):
self._warn_if_poorly_defined()
if isinstance(other, UncertainParameterArray):
new = self._from_t(self.t.__add__(other.t))
new.stdzr = Standardizer(**(self.stdzr | other.stdzr))
new.stdzr = Standardizer(**{**self.stdzr, **other.stdzr}) # Fix once Python >= 3.9
else:
new = super().__add__(other)
return new
Expand All @@ -1101,7 +1101,7 @@ def __sub__(self, other):
self._warn_if_poorly_defined()
if isinstance(other, UncertainParameterArray):
new = self._from_t(self.t.__sub__(other.t))
new.stdzr = Standardizer(**(self.stdzr | other.stdzr))
new.stdzr = Standardizer(**{**self.stdzr, **other.stdzr}) # Fix once Python >= 3.9
else:
new = super().__sub__(other)
return new
Expand All @@ -1111,7 +1111,7 @@ def __rsub__(self, other):
self._warn_if_poorly_defined()
if isinstance(other, UncertainParameterArray):
new = self._from_t(self.t.__rsub__(other.t))
new.stdzr = Standardizer(**(other.stdzr | self.stdzr))
new.stdzr = Standardizer(**{**other.stdzr, **self.stdzr}) # Fix once Python >= 3.9
else:
new = super().__rsub__(other)
return new
Expand Down
20 changes: 14 additions & 6 deletions gumbi/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ def colorbar(self, mappable=None, cax=None, ax=None, **kwargs):
"""Wrapper for ``matplotlib.pyplot.colorbar``; adjusts ticks and labels according to plotter settings."""
cbar = plt.colorbar(mappable=mappable, cax=cax, ax=ax, **kwargs)

self.zlabel = self.zlabel.removesuffix('_z').removesuffix('_t')
if self.zlabel.endswith('_z') or self.zlabel.endswith('_t'):
self.zlabel = self.zlabel[:-2]
# self.zlabel = self.zlabel.removesuffix('_z').removesuffix('_t') # Use when Python>=3.7
_reformat_tick_labels(cbar, 'c', self.zlabel, self.z_scale, self.z_tick_scale, self.stdzr)

label = _augment_label(self.stdzr, self.zlabel, self.z_tick_scale)
Expand Down Expand Up @@ -184,8 +186,8 @@ def plot(self, ci=0.95, ax=None, palette=None, line_kws=None, ci_kws=None):
line_defaults = dict(lw=2, color=palette[-2], zorder=0)
ci_defaults = dict(lw=2, facecolor=palette[1], zorder=-1, alpha=0.5)

line_kws = line_defaults | line_kws
ci_kws = ci_defaults | ci_kws
line_kws = {**line_defaults, **line_kws} # Fix once Python >= 3.9
ci_kws = {**ci_defaults, **ci_kws} # Fix once Python >= 3.9

ax = plt.gca() if ax is None else ax
ax.plot(self.x_, self.y_, **line_kws)
Expand Down Expand Up @@ -298,8 +300,13 @@ def _parse_uparray(upa, scale) -> (UncertainParameterArray | UncertainArray, str


def _format_parray_plot_labels(ax, stdzr, xlabel, x_scale, x_tick_scale, ylabel, y_scale, y_tick_scale):
xlabel = xlabel.removesuffix('_z').removesuffix('_t')
ylabel = ylabel.removesuffix('_z').removesuffix('_t')

if xlabel.endswith('_z') or xlabel.endswith('_t'):
xlabel = xlabel[:-2]
if ylabel.endswith('_z') or ylabel.endswith('_t'):
ylabel = ylabel[:-2]
# xlabel = xlabel.removesuffix('_z').removesuffix('_t') # Use when Python>=3.9
# ylabel = ylabel.removesuffix('_z').removesuffix('_t') # Use when Python>=3.9
_reformat_tick_labels(ax, 'x', xlabel, x_scale, x_tick_scale, stdzr)
_reformat_tick_labels(ax, 'y', ylabel, y_scale, y_tick_scale, stdzr)

Expand All @@ -325,7 +332,8 @@ def _reformat_tick_labels(ax, axis, name, current, new, stdzr, sigfigs=3):
}

if current != new:
if (tpl := (current, new)) not in tick_setters:
tpl = (current, new)
if tpl not in tick_setters:
raise ValueError('Cannot convert ticks between {0} and {1}'.format(*tpl))
else:
tick_setter = tick_setters[tpl]
Expand Down
20 changes: 11 additions & 9 deletions gumbi/regression/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,12 @@ def dims(self) -> list:
@property
def levels(self) -> dict:
"""Dictionary of values considered within each dimension as ``{dim: [level1, level2]}``"""
return self.continuous_levels | self.categorical_levels
return {**self.continuous_levels, **self.categorical_levels} # Fix once Python >= 3.9

@property
def coords(self) -> dict:
""" Dictionary of numerical coordinates of each level within each dimension as ``{dim: {level: coord}}``"""
return self.continuous_coords | self.categorical_coords
return {**self.continuous_coords, **self.categorical_coords} # Fix once Python >= 3.9

################################################################################
# Preprocessing
Expand Down Expand Up @@ -273,13 +273,15 @@ def _parse_levels(self, dims: list, levels: None or str or list or dict) -> dict
if not isinstance(v, list):
levels[d] = [v]
# Ensure each dimension specified by levels is valid
if (bad := [dim for dim in levels.keys() if dim not in dims]):
bad = [dim for dim in levels.keys() if dim not in dims]
if bad:
raise KeyError(f'Dimensions {bad} specified in *levels not found in *dims')
# Ensure each level is valid
if (bad := {k: v for k, vs in levels.items() for v in vs if v not in self.data.tidy[k].unique()}):
bad = {k: v for k, vs in levels.items() for v in vs if v not in self.data.tidy[k].unique()}
if bad:
raise ValueError(f'Values specified in *levels not found in tidy: {bad}')
# Use all levels of remaining dims
levels |= {dim: list(self.data.tidy[dim].unique()) for dim in dims if dim not in levels.keys()}
levels.update({dim: list(self.data.tidy[dim].unique()) for dim in dims if dim not in levels.keys()}) # Fix once Python >= 3.9
else:
raise TypeError('`levels` must be of type str, list, or dict')

Expand Down Expand Up @@ -896,23 +898,23 @@ def cross_validate(self, unit=None, *, n_train=None, pct_train=None, train_only=
categorical_dims=categorical_dims, categorical_levels=self.categorical_levels,
additive=self.additive)

train_specs = specifications | {
train_specs = {**specifications, ** {
'continuous_levels': {dim: [lvl for lvl in lvls if lvl in train_df[dim].values]
for dim, lvls in self.continuous_levels.items()},
'categorical_levels': {dim: [lvl for lvl in lvls if lvl in train_df[dim].values]
for dim, lvls in self.categorical_levels.items()},
'continuous_coords': {dim: {lvl: coord for lvl, coord in coords.items() if lvl in train_df[dim].values}
for dim, coords in self.continuous_coords.items()}
}
}} # Fix once Python >= 3.9

test_specs = specifications | {
test_specs = {**specifications, ** {
'continuous_levels': {dim: [lvl for lvl in lvls if lvl in test_df[dim].values]
for dim, lvls in self.continuous_levels.items()},
'categorical_levels': {dim: [lvl for lvl in lvls if lvl in test_df[dim].values]
for dim, lvls in self.categorical_levels.items()},
'continuous_coords': {dim: {lvl: coord for lvl, coord in coords.items() if lvl in test_df[dim].values}
for dim, coords in self.continuous_coords.items()}
}
}} # Fix once Python >= 3.9

dataset_specs = dict(outputs=self.data.outputs,
names_column=self.data.names_column,
Expand Down
4 changes: 2 additions & 2 deletions gumbi/regression/pymc3/GP.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def _choose_implementation(self, sparse=False, latent=False):
gp_kws = {}

def implementation(*args, **kwargs):
return pm_gp(*args, **(kwargs | gp_kws))
return pm_gp(*args, **{**kwargs, **gp_kws}) # Fix once Python >= 3.9

return implementation

Expand Down Expand Up @@ -618,7 +618,7 @@ def sample(self, *args, **kwargs):

assert self.model is not None
with self.model:
self.trace = pm.sample(*args, **(defaults | kwargs))
self.trace = pm.sample(*args, **{**defaults, **kwargs})

return self.trace

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
long_description=long_description,
url=URL,
license=LICENSE,
python_requires='>=3.9',
python_requires='>=3.7',
packages=find_packages(),
include_package_data=True,
install_requires=install_reqs,
Expand Down
1 change: 0 additions & 1 deletion tests/test_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def test_parray():
# Parameter found in stdzr
# TODO: Update parray test when change stdzr defaults
rpa = parray(d=np.arange(5, 10) / 10, stdzr=stdzr)
assert rpa
assert np.allclose(rpa, np.arange(5, 10) / 10)
assert np.allclose(rpa.values(), np.arange(5, 10) / 10)
assert np.allclose(rpa.t, np.array([-0.69314718, -0.51082562, -0.35667494, -0.22314355, -0.10536052]))
Expand Down
Binary file modified tests/test_data/estimates_test_data.pkl
Binary file not shown.
Binary file modified tests/test_data/test_dataset.pkl
Binary file not shown.
10 changes: 5 additions & 5 deletions tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,11 @@ def test_gp_single_input_fit_parsing(example_gp):

# Model building

def test_gp_build_model_simple(example_gp):
gp = example_gp.specify_model(continuous_dims=['X', 'Y'])
gp.build_model()
assert isinstance(gp.model, pm.model.Model)
assert isinstance(gp.gp_dict['total'], pm.gp.gp.Marginal)
# def test_gp_build_model_simple(example_gp):
# gp = example_gp.specify_model(continuous_dims=['X', 'Y'])
# gp.build_model()
# assert isinstance(gp.model, pm.model.Model)
# assert isinstance(gp.gp_dict['total'], pm.gp.gp.Marginal)


# Combinatorial gp objects with various parameterizations
Expand Down