Skip to content

Commit

Permalink
Simplify simulation and prepare for #310. (#312)
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasraabe authored Jan 15, 2020
1 parent 3de71f1 commit 84863f0
Show file tree
Hide file tree
Showing 9 changed files with 278 additions and 293 deletions.
6 changes: 3 additions & 3 deletions .azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:

- bash: |
source activate respy
tox
tox -- -m "not slow"
displayName: Run all tests.
- job:
Expand All @@ -54,7 +54,7 @@ jobs:

- script: |
call activate respy
tox -e pytest
tox -e pytest -- -m "not slow"
displayName: Run pytest.
- job:
Expand Down Expand Up @@ -94,5 +94,5 @@ jobs:

- bash: |
source activate respy
tox -e pytest
tox -e pytest -- -m "not slow"
displayName: Run pytest.
22 changes: 11 additions & 11 deletions docs/getting_started/tutorial-simulation.ipynb

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions respy/likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from respy.shared import create_base_draws
from respy.shared import downcast_to_smallest_dtype
from respy.shared import generate_column_dtype_dict_for_estimation
from respy.shared import rename_labels
from respy.shared import rename_labels_to_internal
from respy.solve import solve_with_backward_induction
from respy.state_space import StateSpace

Expand Down Expand Up @@ -499,8 +499,11 @@ def _process_estimation_data(df, state_space, optim_paras, options):
"""
col_dtype = generate_column_dtype_dict_for_estimation(optim_paras)

df = df.sort_index()[list(col_dtype)[2:]]
df = df.rename(columns=rename_labels).rename_axis(index=rename_labels)
df = (
df.sort_index()[list(col_dtype)[2:]]
.rename(columns=rename_labels_to_internal)
.rename_axis(index=rename_labels_to_internal)
)
df = convert_labeled_variables_to_codes(df, optim_paras)

# Get indices of states in the state space corresponding to all observations for all
Expand Down
99 changes: 56 additions & 43 deletions respy/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import from respy itself. This is to prevent circular imports.
"""
import copy

import chaospy as cp
import numba as nb
import numpy as np
Expand Down Expand Up @@ -107,8 +109,8 @@ def create_base_draws(shape, seed, monte_carlo_sequence):
University Press.*
"""
n_choices = shape[2]
n_points = shape[0] * shape[1]
n_choices = shape[-1]
n_points = np.prod(shape[:-1])

np.random.seed(seed)

Expand Down Expand Up @@ -151,8 +153,8 @@ def transform_base_draws_with_cholesky_factor(draws, shocks_cholesky, n_wages):
"""
draws_transformed = draws.dot(shocks_cholesky.T)
draws_transformed[:, :, :n_wages] = np.exp(
np.clip(draws_transformed[:, :, :n_wages], MIN_LOG_FLOAT, MAX_LOG_FLOAT)
draws_transformed[..., :n_wages] = np.exp(
np.clip(draws_transformed[..., :n_wages], MIN_LOG_FLOAT, MAX_LOG_FLOAT)
)

return draws_transformed
Expand All @@ -179,25 +181,6 @@ def generate_column_dtype_dict_for_estimation(optim_paras):
return column_dtype_dict


def generate_column_dtype_dict_for_simulation(optim_paras):
"""Generate column labels for simulation output."""
est_col_dtype = generate_column_dtype_dict_for_estimation(optim_paras)
labels = ["Type"] if optim_paras["n_types"] >= 2 else []
labels += (
[f"Nonpecuniary_Reward_{choice.title()}" for choice in optim_paras["choices"]]
+ [f"Wage_{choice.title()}" for choice in optim_paras["choices_w_wage"]]
+ [f"Flow_Utility_{choice.title()}" for choice in optim_paras["choices"]]
+ [f"Value_Function_{choice.title()}" for choice in optim_paras["choices"]]
+ [f"Shock_Reward_{choice.title()}" for choice in optim_paras["choices"]]
+ ["Discount_Rate"]
)

sim_col_dtype = {col: (int if col == "Type" else float) for col in labels}
sim_col_dtype = {**est_col_dtype, **sim_col_dtype}

return sim_col_dtype


@nb.njit
def clip(x, minimum=None, maximum=None):
"""Clip input array at minimum and maximum."""
Expand All @@ -214,37 +197,48 @@ def clip(x, minimum=None, maximum=None):
return out


def downcast_to_smallest_dtype(series):
def downcast_to_smallest_dtype(series, downcast_options=None):
"""Downcast the dtype of a :class:`pandas.Series` to the lowest possible dtype.
By default, variables are converted to signed or unsigned integers. Use ``"float"``
to cast variables from ``float64`` to ``float32``.
Be aware that NumPy integers silently overflow which is why conversion to low dtypes
should be done after calculations. For example, using :class:`np.uint8` for an array
and squaring the elements leads to silent overflows for numbers higher than 255.
should be done after calculations. For example, using :class:`numpy.uint8` for an
array and squaring the elements leads to silent overflows for numbers higher than
255.
For more information on the boundaries the NumPy documentation under
For more information on the dtype boundaries see the NumPy documentation under
https://docs.scipy.org/doc/numpy-1.17.0/user/basics.types.html.
"""
# We can skip integer as "unsigned" and "signed" will find the same dtypes.
_downcast_options = ["unsigned", "signed", "float"]
if downcast_options is None:
downcast_options = ["unsigned", "signed"]

if series.dtype.name == "category":
out = series

# Convert bools to integers because they turn the dot product in
# `create_choice_rewards` to the object dtype.
elif series.dtype == np.bool:
out = series.astype(np.dtype("uint8"))

else:
min_dtype = np.dtype("float64")
min_dtype = series.dtype

for dc_opt in _downcast_options:
dtype = pd.to_numeric(series, downcast=dc_opt).dtype
for dc_opt in downcast_options:
try:
dtype = pd.to_numeric(series, downcast=dc_opt).dtype
# A ValueError happens if strings are found in the series.
except ValueError:
min_dtype = "category"
break

if dtype.itemsize == 1 and dtype.name.startswith("u"):
# If we can convert the series to an unsigned integer, we can stop.
if dtype.name.startswith("u"):
min_dtype = dtype
break
elif dtype.itemsize == min_dtype.itemsize and dtype.name.startswith("u"):
min_dtype = dtype
elif dtype.itemsize < min_dtype.itemsize:
min_dtype = dtype
else:
Expand Down Expand Up @@ -282,15 +276,29 @@ def create_base_covariates(states, covariates_spec, raise_errors=True):
"""
covariates = states.copy()

for covariate, definition in covariates_spec.items():
if covariate not in states.columns:
try:
covariates[covariate] = covariates.eval(definition)
except pd.core.computation.ops.UndefinedVariableError as e:
if raise_errors:
raise e
else:
has_covariates_left_changed = True
covariates_left = list(covariates_spec)

while has_covariates_left_changed:
n_covariates_left = len(covariates_left)

# Create a copy of `covariates_left` to remove elements without side-effects.
for covariate in copy.copy(covariates_left):
# Check if the covariate does not exist and needs to be computed.
is_covariate_missing = covariate not in covariates.columns

if is_covariate_missing:
try:
covariates[covariate] = covariates.eval(covariates_spec[covariate])
except pd.core.computation.ops.UndefinedVariableError:
pass
else:
covariates_left.remove(covariate)

has_covariates_left_changed = n_covariates_left != len(covariates_left)

if covariates_left and raise_errors:
raise Exception(f"Cannot compute all covariates: {covariates_left}.")

covariates = covariates.drop(columns=states.columns)

Expand Down Expand Up @@ -323,11 +331,16 @@ def convert_labeled_variables_to_codes(df, optim_paras):
return df


def rename_labels(x):
def rename_labels_to_internal(x):
"""Shorten labels and convert them to lower-case."""
return x.replace("Experience", "exp").lower()


def rename_labels_from_internal(x):
"""Shorten labels and convert them to lower-case."""
return x.replace("exp", "Experience").title()


def normalize_probabilities(probabilities):
"""Normalize probabilities such that their sum equals one.
Expand Down
Loading

0 comments on commit 84863f0

Please sign in to comment.