diff --git a/CHANGES.rst b/CHANGES.rst index ad985a090..378c473b7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -65,6 +65,10 @@ releases are available on `Anaconda.org the data preparation. - :gh:`300` aligns respy functions with the new data in :gh:`299` (:ghuser:`tobiasraabe`). +- :gh:`310` introduces the separation between a core state space and dense dimensions of + the state space which reduces memory consumption by a lot and makes respy scalable. + :gh:`312` and :gh:`313` include changes to the simulation or maximum likelihood + estimation which pave the way for :gh:`310`. (:ghuser:`tobiasraabe`) - :gh:`314` fixes two parameters in KW97 and KW2000 (:ghuser:`tostenzel`, :ghuser:`tobiasraabe`). - :gh:`316` changes the invalid index value for the indexer to prevent silent errors @@ -76,6 +80,10 @@ releases are available on `Anaconda.org - :gh:`323` adds an informative message if simulated individuals cannot be mapped to states in the state space (:ghuser:`mo2561057`, :ghuser:`tobiasraabe`). + +*Releases prior to the second version were published on PyPI, but deleted later. You can +still checkout the following releases with the corresponding tags.* + 1.2.1 - 2019-05-19 ------------------ diff --git a/LICENSE b/LICENSE index 787231be1..e0454915e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2015-2019 +Copyright (c) 2015-2020 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.rst b/README.rst index b94af6156..e0e67df42 100644 --- a/README.rst +++ b/README.rst @@ -21,7 +21,7 @@ respy .. image:: https://img.shields.io/badge/code%20style-black-000000.svg :target: https://github.com/psf/black - + ``respy`` is an open-source Python package for the simulation and estimation of a prototypical finite-horizon discrete choice dynamic programming model. We build on the diff --git a/development/testing/regression.py b/development/testing/regression.py index 1712d94a4..8ffbc89fc 100644 --- a/development/testing/regression.py +++ b/development/testing/regression.py @@ -112,11 +112,13 @@ def _check_single(test, strict): """Check a single test.""" params, options, exp_val = test - crit_val = calc_crit_val(params, options) - - is_success = np.isclose( - crit_val, exp_val, rtol=TOL_REGRESSION_TESTS, atol=TOL_REGRESSION_TESTS - ) + try: + crit_val = calc_crit_val(params, options) + is_success = np.isclose( + crit_val, exp_val, rtol=TOL_REGRESSION_TESTS, atol=TOL_REGRESSION_TESTS + ) + except Exception: + is_success = False if strict is True: assert is_success, "Failed regression test." diff --git a/docs/api.rst b/docs/api.rst index a7c665a91..9852e2672 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,7 +1,6 @@ API === - User ---- @@ -24,6 +23,7 @@ solve .. autosummary:: :toctree: _generated/ + get_solve_func solve simulate @@ -58,31 +58,3 @@ likelihood get_crit_func log_like - - -Developer ---------- - -config -~~~~~~ - -.. currentmodule:: respy.config - -.. autosummary:: - :toctree: _generated/ - - INDEXER_DTYPE - INDEXER_INVALID_INDEX - -state space -~~~~~~~~~~~ - -.. currentmodule:: respy.state_space - -.. autosummary:: - :toctree: _generated/ - - StateSpace - StateSpace.get_attribute_from_period - StateSpace.get_continuation_values - StateSpace.update_systematic_rewards diff --git a/docs/conf.py b/docs/conf.py index 2c29c0137..3f1166f80 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,8 +18,8 @@ def setup(app): # -- Project information ----------------------------------------------------- project = "respy" -copyright = "2015-2019, Philipp Eisenhauer" # noqa: A001 -author = "Philipp Eisenhauer" +copyright = "2015-2020, The respy Development Team" # noqa: A001 +author = "The respy Development Team" # The full version, including alpha/beta/rc tags. release = "2.0.0dev2" @@ -49,6 +49,7 @@ def setup(app): autodoc_mock_imports = [ "chaospy", "estimagic", + "joblib", "numba", "numpy", "pandas", @@ -106,7 +107,7 @@ def setup(app): .. nbinfo:: - Download the notebook :download:`here `! + View and download the notebook `here `_! """ @@ -117,7 +118,6 @@ def setup(app): # Configuration for autodoc autosummary_generate = True - # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for diff --git a/docs/development/randomness-and-reproducibility.rst b/docs/development/randomness-and-reproducibility.rst index e36ce5846..ae646116d 100644 --- a/docs/development/randomness-and-reproducibility.rst +++ b/docs/development/randomness-and-reproducibility.rst @@ -59,16 +59,16 @@ parameters. respy.solve ----------- -.. currentmodule:: respy.solve - Routines under ``respy.solve`` use a seed from the sequence initialized by ``options["solution_seed"]`` to control randomness. Apart from the draws, :func:`~respy.solve.solve` relies on the following function. +.. currentmodule:: respy.interpolate + .. autosummary:: :toctree: ../_generated/ - get_not_interpolated_indicator + _get_not_interpolated_indicator respy.simulate diff --git a/docs/development/releases.rst b/docs/development/releases.rst index 82f337c7f..dc75d7f38 100644 --- a/docs/development/releases.rst +++ b/docs/development/releases.rst @@ -24,28 +24,21 @@ How to release a new version? 2. Second, we need to create a final PR to prepare everything for the new version. The name of the PR and the commit message will be "Release vx.y.z". We need to - - update all references of the old version number (``setup.py``, - ``respy/__init__.py``, ``docs/conf.py``). + - use ``bumpversion part `` to increment the correct part of + the version number in all files. - update information in ``CHANGES.rst`` to have summary of the changes which can also be posted in the Github repository under the tag. - Merge the PR into master. +3. Run ``python release.py`` and check whether you can actually release a new version. + If you experience errors, fix them here. -3. After that, revisit the draft of the release. Make sure everything is fine. Now, you - click on "Publish release" which creates a version tag on the latest commit of the - specified branch. The tag will trigger a build on Travis-CI which will publish the - release on PypI. - -4. Make sure that the new release was indeed published by checking `PyPI - `_. - -5. Spread the word! +4. Merge the PR into master. +5. After that, revisit the draft of the release. Make sure everything is fine. Now, you + click on "Publish release" which creates a version tag on the latest commit of the + specified branch. Make sure to target the master branch. -Notes ------ +6. Visit `Anaconda.org `_ and check + whether the release is available. -- Travis-CI only builds tags if "Build pushed branches" is active. -- If you publish a release on PyPI, the same version number cannot be reused even if you - delete the release. This is a safety measure. If you are not sure whether the release - will work, create a release candidate instead and publish the real version later. +7. Spread the word! diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 07e620097..51890accf 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -11,6 +11,5 @@ Getting Started tutorial-robinson-crusoe.ipynb tutorial-simulation.ipynb tutorial-initial-conditions - tutorial-model-with-observables.ipynb tutorial-finite-mixture.ipynb tutorial-msm.ipynb diff --git a/docs/getting_started/tutorial-finite-mixture.ipynb b/docs/getting_started/tutorial-finite-mixture.ipynb index a6c29e243..62191c27e 100644 --- a/docs/getting_started/tutorial-finite-mixture.ipynb +++ b/docs/getting_started/tutorial-finite-mixture.ipynb @@ -218,7 +218,7 @@ " \n", " initial_exp_fishing_2\n", " probability\n", - " 0.33\n", + " 0.34\n", " \n", " \n", "\n", @@ -229,7 +229,7 @@ "category name \n", "initial_exp_fishing_0 probability 0.33\n", "initial_exp_fishing_1 probability 0.33\n", - "initial_exp_fishing_2 probability 0.33" + "initial_exp_fishing_2 probability 0.34" ] }, "execution_count": 3, @@ -242,7 +242,7 @@ "category,name,value\n", "initial_exp_fishing_0,probability,0.33\n", "initial_exp_fishing_1,probability,0.33\n", - "initial_exp_fishing_2,probability,0.33\n", + "initial_exp_fishing_2,probability,0.34\n", "\"\"\"), index_col=[\"category\", \"name\"])\n", "initial_exp_fishing" ] @@ -565,7 +565,7 @@ " \n", " initial_exp_fishing_2\n", " probability\n", - " 0.3300\n", + " 0.3400\n", " \n", " \n", " wage_fishing\n", @@ -620,7 +620,7 @@ "lagged_choice_1_hammock constant 1.0000\n", "initial_exp_fishing_0 probability 0.3300\n", "initial_exp_fishing_1 probability 0.3300\n", - "initial_exp_fishing_2 probability 0.3300\n", + "initial_exp_fishing_2 probability 0.3400\n", "wage_fishing type_1 0.2000\n", " type_2 0.4000\n", "type_1 initial_exp_fishing_0 -90.0000\n", @@ -689,18 +689,7 @@ "cell_type": "code", "execution_count": 9, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\tobia\\git\\respy\\respy\\pre_processing\\model_processing.py:599: UserWarning: The probabilities for parameter group \\binitial_exp_fishing_([0-9]+)\\b do not sum to one.\n", - " category=UserWarning,\n", - "C:\\Users\\tobia\\git\\respy\\respy\\pre_processing\\model_processing.py:599: UserWarning: The probabilities for parameter group \\binitial_exp_fishing_([0-9]+)\\b do not sum to one.\n", - " category=UserWarning,\n" - ] - } - ], + "outputs": [], "source": [ "simulate = rp.get_simulate_func(params, options)\n", "df = simulate(params)" @@ -753,14 +742,14 @@ " \n", " 1\n", " 0.000000\n", - " 0.659223\n", - " 0.340777\n", + " 0.656887\n", + " 0.343113\n", " \n", " \n", " 2\n", - " 0.000302\n", - " 0.337564\n", - " 0.662134\n", + " 0.000296\n", + " 0.338958\n", + " 0.660746\n", " \n", " \n", "\n", @@ -770,8 +759,8 @@ "Type 0 1 2\n", "Experience_Fishing \n", "0 1.000000 0.000000 0.000000\n", - "1 0.000000 0.659223 0.340777\n", - "2 0.000302 0.337564 0.662134" + "1 0.000000 0.656887 0.343113\n", + "2 0.000296 0.338958 0.660746" ] }, "execution_count": 10, @@ -828,18 +817,18 @@ " \n", " \n", " 0\n", - " 0.438767\n", - " 0.561233\n", + " 0.440916\n", + " 0.559084\n", " \n", " \n", " 1\n", - " 0.590465\n", - " 0.409535\n", + " 0.591656\n", + " 0.408344\n", " \n", " \n", " 2\n", - " 0.705991\n", - " 0.294009\n", + " 0.700927\n", + " 0.299073\n", " \n", " \n", "\n", @@ -848,9 +837,9 @@ "text/plain": [ "Choice fishing hammock\n", "Type \n", - "0 0.438767 0.561233\n", - "1 0.590465 0.409535\n", - "2 0.705991 0.294009" + "0 0.440916 0.559084\n", + "1 0.591656 0.408344\n", + "2 0.700927 0.299073" ] }, "execution_count": 11, @@ -879,7 +868,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/docs/getting_started/tutorial-initial-conditions.ipynb b/docs/getting_started/tutorial-initial-conditions.ipynb index 04a41c865..7a72ccef6 100644 --- a/docs/getting_started/tutorial-initial-conditions.ipynb +++ b/docs/getting_started/tutorial-initial-conditions.ipynb @@ -170,7 +170,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -351,7 +351,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -539,9 +539,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\tobia\\git\\respy\\respy\\pre_processing\\model_processing.py:475: UserWarning: The distribution of initial lagged choices is insufficiently specified in the parameters. Covariates require 1 lagged choices and parameters define 0. Missing lags have equiprobable choices.\n", + "C:\\Users\\tobia\\git\\respy\\respy\\pre_processing\\model_processing.py:438: UserWarning: The distribution of initial lagged choices is insufficiently specified in the parameters. Covariates require 1 lagged choices and parameters define 0. Missing lags have equiprobable choices.\n", " category=UserWarning,\n", - "C:\\Users\\tobia\\git\\respy\\respy\\pre_processing\\model_processing.py:475: UserWarning: The distribution of initial lagged choices is insufficiently specified in the parameters. Covariates require 1 lagged choices and parameters define 0. Missing lags have equiprobable choices.\n", + "C:\\Users\\tobia\\git\\respy\\respy\\pre_processing\\model_processing.py:438: UserWarning: The distribution of initial lagged choices is insufficiently specified in the parameters. Covariates require 1 lagged choices and parameters define 0. Missing lags have equiprobable choices.\n", " category=UserWarning,\n" ] } @@ -555,6 +555,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "The warning is raised because we forgot to specify what individuals in period 0 have been doing in the previous period. As a default, `respy` assumes that all choices have the same probability for being the previous choice. Note that, it might lead to inconsistent states where an individual should have accumulated experience in the previous period, but still starts with zero experience.\n", + "\n", "Below we see the choice probabilities on the left-hand-side and the shares of lagged choices on the right-hand-side. Without further information, ``respy`` makes all previous choices in the first period equiprobable.\n", "\n", "If we had set the covariate with the lagged choice but not added a parameter using the covariate, ``respy`` would have discarded the covariate and created a model without lagged choices." @@ -567,7 +569,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -839,7 +841,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -1097,7 +1099,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -1157,7 +1159,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/docs/getting_started/tutorial-model-with-observables.ipynb b/docs/getting_started/tutorial-model-with-observables.ipynb deleted file mode 100644 index 75da07859..000000000 --- a/docs/getting_started/tutorial-model-with-observables.ipynb +++ /dev/null @@ -1,788 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Implementing a model with observed state space components\n", - "\n", - "This notebook shows how to introduce observable characteristics of an individual to the state space. A potential source of unobserved heterogeneity in the models of Keane and Wolpin (1994) and Keane and Wolpin (1997) stems from the fact that individual ability is not observed. The authors try to mitigate the influence by using a finite fixture model with four different types as the years of schooling at the start of the model horizon are potentially not exogenous. If we had data on ability, we could probably shift some of the explanatory power of types to an ability covariate. Furthermore, making type probabilities dependent on the ability level, types become more economically interpretable." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import respy as rp" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Parameters, options and data\n", - "\n", - "As we have no ability measure in the original data of Keane and Wolpin (1997), we assume that the initial years of schooling serve as a five point scale ability measure. The measure should start at 0 which makes it more suitable to the model." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\tobia\\git\\respy\\respy\\pre_processing\\model_processing.py:204: UserWarning: The shares of initial experiences for choice 'school' do not sum to one. Shares are divided by their sum for normalization.\n", - " category=UserWarning,\n" - ] - } - ], - "source": [ - "params, options, df = rp.get_example_model(\"kw_97_base\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "df[\"Ability\"] = (\n", - " df.groupby(\"Identifier\").Experience_School.transform(\"first\")\n", - " .subtract(7)\n", - " .astype(np.uint8)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Furthermore, we include covariates of our new ability measure in the parameter specification and define the covariates in the options. For simplification, we treat our ability measure similar to IQ which was originally used to determine basic mental capabilities. Our single covariate is thus having an ability level higher than zero. Still, we keep the five point scale instead of a simpler two point scale to determine the impact on the size of the state space.\n", - "\n", - "The new covariate enters the wage component of working alternatives and the non-pecuniary component of non-working alternatives positively. " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Add ability parameters to wage components.\n", - "for category in [\"wage_white_collar\", \"wage_blue_collar\", \"wage_military\"]:\n", - " params.loc[(category, \"at_least_one_ability\"), :] = [\n", - " 0.1, \"return to having at least ability level one\"\n", - " ]\n", - "\n", - "# Add ability parameters to non-pecuniary components.\n", - "for category in [\"nonpec_school\", \"nonpec_home\"]:\n", - " params.loc[(category, \"at_least_one_ability\"), :] = [\n", - " 2000, \"return to having at least ability level one\"\n", - " ]\n", - "\n", - "# Add ability parameters to type proobabilities.\n", - "for category in [\"type_2\", \"type_3\", \"type_4\"]:\n", - " params.loc[(category, \"at_least_one_ability\"), :] = [\n", - " 0.1, \"return to having at least ability level one\"\n", - " ]\n", - "\n", - "# Define the probability for ability levels for the simulation.\n", - "for name, val in zip(\n", - " [f\"ability_{i}\" for i in range(5)], [0.0098, 0.0431, 0.201, 0.6702, 0.0759]\n", - "):\n", - " params.loc[(\"observables\", name), :] = [\n", - " val, \"Probability of having the specified ability level\"\n", - " ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As the ``\"name\"`` column in the parameter dataframe is matched to covariates, we have to define ``\"at_least_one_ability\"``. The string under ``options[\"covariates\"][\"at_least_one_ability\"]`` is evaluated using [pandas.DataFrame.eval()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.eval.html). Under ``options[\"observables\"][\"ability\"]`` we store the range of ability levels which is five." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "options[\"covariates\"][\"at_least_one_ability\"] = \"ability >= 1\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# For simplification we restrict the model to 11 periods.\n", - "options[\"n_periods\"] = 11" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we solve the model." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "state_space = rp.solve(params, options)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
valuecomment
categoryname
deltadelta0.7870discount factor
wage_white_collarconstant8.8043log of rental price if the base skill endowmen...
exp_school0.0938linear return to an additional year of schooli...
exp_white_collar0.1170return to experience, same sector, linear (wage)
exp_white_collar_square-0.0461return to experience, same sector, quadratic (...
............
observablesability_00.0098Probability of having the specified ability level
ability_10.0431Probability of having the specified ability level
ability_20.2010Probability of having the specified ability level
ability_30.6702Probability of having the specified ability level
ability_40.0759Probability of having the specified ability level
\n", - "

79 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " value \\\n", - "category name \n", - "delta delta 0.7870 \n", - "wage_white_collar constant 8.8043 \n", - " exp_school 0.0938 \n", - " exp_white_collar 0.1170 \n", - " exp_white_collar_square -0.0461 \n", - "... ... \n", - "observables ability_0 0.0098 \n", - " ability_1 0.0431 \n", - " ability_2 0.2010 \n", - " ability_3 0.6702 \n", - " ability_4 0.0759 \n", - "\n", - " comment \n", - "category name \n", - "delta delta discount factor \n", - "wage_white_collar constant log of rental price if the base skill endowmen... \n", - " exp_school linear return to an additional year of schooli... \n", - " exp_white_collar return to experience, same sector, linear (wage) \n", - " exp_white_collar_square return to experience, same sector, quadratic (... \n", - "... ... \n", - "observables ability_0 Probability of having the specified ability level \n", - " ability_1 Probability of having the specified ability level \n", - " ability_2 Probability of having the specified ability level \n", - " ability_3 Probability of having the specified ability level \n", - " ability_4 Probability of having the specified ability level \n", - "\n", - "[79 rows x 2 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "params" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we calculate the likelihood value of the data." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\tobia\\git\\respy\\respy\\pre_processing\\model_processing.py:204: UserWarning: The shares of initial experiences for choice 'school' do not sum to one. Shares are divided by their sum for normalization.\n", - " category=UserWarning,\n" - ] - } - ], - "source": [ - "criterion = rp.get_crit_func(params, options, df)\n", - "crit_val = criterion(params)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "-55.39840900556615" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crit_val" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we simulate a new data set given the proportions of ability levels specified in ``params`` under ``\"ability\"``." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "simulate = rp.get_simulate_func(params, options)\n", - "df = simulate(params)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.2, 0.4, 0.6])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[1, 2, 3] / np.sum(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IdentifierPeriodChoiceWageExperience_Blue_CollarExperience_MilitaryExperience_White_CollarExperience_SchoolAbilityType
000schoolNaN0001032
101white_collar12175.7332090001132
202white_collar28774.1435650011132
303blue_collar18002.6347570021132
404schoolNaN1021132
505white_collar39528.7383781021232
606white_collar35261.9993111031232
707homeNaN1041232
808white_collar24256.3516581041232
909homeNaN1051232
10010white_collar46614.2179061051232
1110schoolNaN000912
1211schoolNaN0001012
1312schoolNaN0001112
1413white_collar10299.3120690001212
1514white_collar13755.1999800011212
1615blue_collar14896.2429010021212
1716white_collar45155.8142381021212
1817white_collar36767.6479131031212
1918white_collar35646.1021281041212
\n", - "
" - ], - "text/plain": [ - " Identifier Period Choice Wage Experience_Blue_Collar \\\n", - "0 0 0 school NaN 0 \n", - "1 0 1 white_collar 12175.733209 0 \n", - "2 0 2 white_collar 28774.143565 0 \n", - "3 0 3 blue_collar 18002.634757 0 \n", - "4 0 4 school NaN 1 \n", - "5 0 5 white_collar 39528.738378 1 \n", - "6 0 6 white_collar 35261.999311 1 \n", - "7 0 7 home NaN 1 \n", - "8 0 8 white_collar 24256.351658 1 \n", - "9 0 9 home NaN 1 \n", - "10 0 10 white_collar 46614.217906 1 \n", - "11 1 0 school NaN 0 \n", - "12 1 1 school NaN 0 \n", - "13 1 2 school NaN 0 \n", - "14 1 3 white_collar 10299.312069 0 \n", - "15 1 4 white_collar 13755.199980 0 \n", - "16 1 5 blue_collar 14896.242901 0 \n", - "17 1 6 white_collar 45155.814238 1 \n", - "18 1 7 white_collar 36767.647913 1 \n", - "19 1 8 white_collar 35646.102128 1 \n", - "\n", - " Experience_Military Experience_White_Collar Experience_School Ability \\\n", - "0 0 0 10 3 \n", - "1 0 0 11 3 \n", - "2 0 1 11 3 \n", - "3 0 2 11 3 \n", - "4 0 2 11 3 \n", - "5 0 2 12 3 \n", - "6 0 3 12 3 \n", - "7 0 4 12 3 \n", - "8 0 4 12 3 \n", - "9 0 5 12 3 \n", - "10 0 5 12 3 \n", - "11 0 0 9 1 \n", - "12 0 0 10 1 \n", - "13 0 0 11 1 \n", - "14 0 0 12 1 \n", - "15 0 1 12 1 \n", - "16 0 2 12 1 \n", - "17 0 2 12 1 \n", - "18 0 3 12 1 \n", - "19 0 4 12 1 \n", - "\n", - " Type \n", - "0 2 \n", - "1 2 \n", - "2 2 \n", - "3 2 \n", - "4 2 \n", - "5 2 \n", - "6 2 \n", - "7 2 \n", - "8 2 \n", - "9 2 \n", - "10 2 \n", - "11 2 \n", - "12 2 \n", - "13 2 \n", - "14 2 \n", - "15 2 \n", - "16 2 \n", - "17 2 \n", - "18 2 \n", - "19 2 " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.loc[:, :\"Type\"].head(20)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## References\n", - "\n", - "> Keane, M. P. and Wolpin, K. I. (1994). [The Solution and Estimation of Discrete Choice Dynamic Programming Models by Simulation and Interpolation: Monte Carlo Evidence](https://doi.org/10.2307/2109768). *Federal Reserve Bank of Minneapolis*, No. 181.\n", - ">\n", - "> Keane, M. P. and Wolpin, K. I. (1997). [The Career Decisions of Young Men](https://doi.org/10.1086/262080>). *Journal of Political Economy*, 105(3): 473-522." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/getting_started/tutorial-msm.ipynb b/docs/getting_started/tutorial-msm.ipynb index a87dccebf..f448ac74a 100644 --- a/docs/getting_started/tutorial-msm.ipynb +++ b/docs/getting_started/tutorial-msm.ipynb @@ -22,10 +22,7 @@ "outputs": [], "source": [ "import pandas as pd \n", - "import respy as rp\n", - "\n", - "from respy.method_of_simulated_moments import get_diag_weighting_matrix\n", - "from respy.method_of_simulated_moments import get_flat_moments" + "import respy as rp" ] }, { @@ -413,7 +410,7 @@ " 'simulation_agents': 1000,\n", " 'simulation_seed': 132,\n", " 'solution_draws': 500,\n", - " 'solution_seed': 1,\n", + " 'solution_seed': 3,\n", " 'monte_carlo_sequence': 'random',\n", " 'core_state_space_filters': [\"period > 0 and exp_{i} == period and lagged_choice_1 != '{i}'\",\n", " \"period > 0 and exp_a + exp_b + exp_edu == period and lagged_choice_1 == '{j}'\",\n", @@ -584,48 +581,48 @@ " \n", " \n", " 0\n", - " 0.442\n", + " 0.409\n", " 0.093\n", - " 0.453\n", - " 0.012\n", - " 16786.328202\n", - " 2733.477206\n", + " 0.488\n", + " 0.010\n", + " 16930.457137\n", + " 2682.867279\n", " \n", " \n", " 1\n", - " 0.474\n", - " 0.184\n", - " 0.293\n", - " 0.049\n", - " 16176.825079\n", - " 3138.869021\n", + " 0.435\n", + " 0.188\n", + " 0.341\n", + " 0.036\n", + " 16286.178001\n", + " 3120.941623\n", " \n", " \n", " 2\n", - " 0.490\n", - " 0.228\n", - " 0.244\n", + " 0.460\n", + " 0.238\n", + " 0.264\n", " 0.038\n", - " 16249.622976\n", - " 3267.884944\n", + " 16578.402709\n", + " 3385.400488\n", " \n", " \n", " 3\n", - " 0.473\n", - " 0.261\n", - " 0.227\n", - " 0.039\n", - " 16800.603663\n", - " 3588.776974\n", + " 0.445\n", + " 0.279\n", + " 0.244\n", + " 0.032\n", + " 16608.887328\n", + " 3461.618469\n", " \n", " \n", " 4\n", - " 0.473\n", - " 0.292\n", - " 0.197\n", - " 0.038\n", - " 16797.209380\n", - " 3549.389371\n", + " 0.423\n", + " 0.322\n", + " 0.220\n", + " 0.035\n", + " 16775.353763\n", + " 3542.164993\n", " \n", " \n", "\n", @@ -634,11 +631,11 @@ "text/plain": [ " a b edu home mean std\n", "Period \n", - "0 0.442 0.093 0.453 0.012 16786.328202 2733.477206\n", - "1 0.474 0.184 0.293 0.049 16176.825079 3138.869021\n", - "2 0.490 0.228 0.244 0.038 16249.622976 3267.884944\n", - "3 0.473 0.261 0.227 0.039 16800.603663 3588.776974\n", - "4 0.473 0.292 0.197 0.038 16797.209380 3549.389371" + "0 0.409 0.093 0.488 0.010 16930.457137 2682.867279\n", + "1 0.435 0.188 0.341 0.036 16286.178001 3120.941623\n", + "2 0.460 0.238 0.264 0.038 16578.402709 3385.400488\n", + "3 0.445 0.279 0.244 0.032 16608.887328 3461.618469\n", + "4 0.423 0.322 0.220 0.035 16775.353763 3542.164993" ] }, "execution_count": 8, @@ -678,7 +675,7 @@ }, "outputs": [], "source": [ - "weighting_matrix = get_diag_weighting_matrix(empirical_moments)" + "weighting_matrix = rp.get_diag_weighting_matrix(empirical_moments)" ] }, { @@ -1068,17 +1065,17 @@ { "data": { "text/plain": [ - "a_0 0.442000\n", - "a_1 0.474000\n", - "a_2 0.490000\n", - "a_3 0.473000\n", - "a_4 0.473000\n", + "a_0 0.409000\n", + "a_1 0.435000\n", + "a_2 0.460000\n", + "a_3 0.445000\n", + "a_4 0.423000\n", " ... \n", - "std_35 13217.343152\n", - "std_36 12693.606573\n", - "std_37 13016.135077\n", - "std_38 13314.644165\n", - "std_39 13567.728207\n", + "std_35 12756.763553\n", + "std_36 12146.297978\n", + "std_37 13293.128148\n", + "std_38 13236.160871\n", + "std_39 13933.439538\n", "Length: 240, dtype: float64" ] }, @@ -1088,7 +1085,7 @@ } ], "source": [ - "flat_empirical_moments = get_flat_moments(empirical_moments)\n", + "flat_empirical_moments = rp.get_flat_moments(empirical_moments)\n", "flat_empirical_moments" ] }, @@ -1199,7 +1196,7 @@ { "data": { "text/plain": [ - "3261729527.428732" + "3603224859.124363" ] }, "execution_count": 14, @@ -1235,17 +1232,17 @@ { "data": { "text/plain": [ - "a_0 0.174000\n", - "a_1 0.200000\n", - "a_2 0.207000\n", - "a_3 0.144000\n", - "a_4 0.137000\n", + "a_0 0.130000\n", + "a_1 0.152000\n", + "a_2 0.151000\n", + "a_3 0.136000\n", + "a_4 0.095000\n", " ... \n", - "std_35 8710.017774\n", - "std_36 7907.238080\n", - "std_37 8275.432439\n", - "std_38 8738.437200\n", - "std_39 8713.130254\n", + "std_35 8112.352899\n", + "std_36 7292.742834\n", + "std_37 8414.066029\n", + "std_38 8509.249336\n", + "std_39 9398.821013\n", "Length: 240, dtype: float64" ] }, @@ -1364,8 +1361,8 @@ }, "outputs": [], "source": [ - "params, options, df_emp = rp.get_example_model(\"kw_94_one\")\n", - "empirical_moments = [calc_choice_freq(df_emp), calc_wage_distr(df_emp), calc_final_edu(df_emp)]\n", + "params, options, df = rp.get_example_model(\"kw_94_one\")\n", + "empirical_moments = [calc_choice_freq(df), calc_wage_distr(df), calc_final_edu(df)]\n", "\n", "empirical_moments = [fill_nans_zero(df) for df in empirical_moments]" ] @@ -1384,7 +1381,7 @@ }, "outputs": [], "source": [ - "weighting_matrix = get_diag_weighting_matrix(empirical_moments)" + "weighting_matrix = rp.get_diag_weighting_matrix(empirical_moments)" ] }, { @@ -1455,7 +1452,7 @@ { "data": { "text/plain": [ - "3261729527.8596244" + "3603224859.597081" ] }, "execution_count": 21, @@ -1483,16 +1480,16 @@ { "data": { "text/plain": [ - "a_0 0.174\n", - "a_1 0.200\n", - "a_2 0.207\n", - "a_3 0.144\n", - "a_4 0.137\n", + "a_0 0.130\n", + "a_1 0.152\n", + "a_2 0.151\n", + "a_3 0.136\n", + "a_4 0.095\n", " ... \n", - "Experience_Edu_16 0.078\n", - "Experience_Edu_17 0.052\n", - "Experience_Edu_18 0.029\n", - "Experience_Edu_19 0.016\n", + "Experience_Edu_16 0.075\n", + "Experience_Edu_17 0.059\n", + "Experience_Edu_18 0.051\n", + "Experience_Edu_19 0.020\n", "Experience_Edu_20 0.010\n", "Length: 251, dtype: float64" ] @@ -1524,7 +1521,7 @@ "\n", "> Keane, M. P. and Wolpin, K. I. (1994). [The Solution and Estimation of Discrete Choice Dynamic Programming Models by Simulation and Interpolation: Monte Carlo Evidence](https://doi.org/10.2307/2109768). *The Review of Economics and Statistics*, 76(4): 648-672.\n", "\n", - "> McFadden, D. (1989). [A Method of Simulated Moments for Estimation of Discrete Response Models without Numerical Integration](https://www.jstor.org/stable/1913621?seq=1). *Econometrica: Journal of the Econometric Society*, 995-1026.\n" + "> McFadden, D. (1989). [A Method of Simulated Moments for Estimation of Discrete Response Models without Numerical Integration](https://jstor.org/stable/1913621). *Econometrica: Journal of the Econometric Society*, 995-1026.\n" ] } ], diff --git a/docs/getting_started/tutorial-robinson-crusoe.ipynb b/docs/getting_started/tutorial-robinson-crusoe.ipynb index 24fd6b623..b4d81c928 100644 --- a/docs/getting_started/tutorial-robinson-crusoe.ipynb +++ b/docs/getting_started/tutorial-robinson-crusoe.ipynb @@ -347,415 +347,532 @@ " \n", " \n", " \n", - " Identifier\n", - " Period\n", - " Choice\n", - " Wage\n", + " \n", " Experience_Fishing\n", " Lagged_Choice_1\n", - " Type\n", + " Shock_Reward_Fishing\n", + " Meas_Error_Wage_Fishing\n", + " Shock_Reward_Hammock\n", + " Meas_Error_Wage_Hammock\n", + " Choice\n", + " Wage\n", + " Discount_Rate\n", " Nonpecuniary_Reward_Fishing\n", - " Nonpecuniary_Reward_Hammock\n", " Wage_Fishing\n", " Flow_Utility_Fishing\n", - " Flow_Utility_Hammock\n", " Value_Function_Fishing\n", + " Continuation_Value_Fishing\n", + " Nonpecuniary_Reward_Hammock\n", + " Wage_Hammock\n", + " Flow_Utility_Hammock\n", " Value_Function_Hammock\n", - " Shock_Reward_Fishing\n", - " Shock_Reward_Hammock\n", - " Discount_Rate\n", + " Continuation_Value_Hammock\n", + " \n", + " \n", + " Identifier\n", + " Period\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " 0\n", " 0\n", " 0\n", - " 0\n", - " fishing\n", - " 2.048628\n", - " 0\n", " hammock\n", - " 0\n", - " -1.0\n", - " 1.5\n", - " 1.000000\n", - " 1.048628\n", - " 2.366250\n", - " 19.630892\n", - " 19.336065\n", " 2.048628\n", + " 1\n", " 0.866250\n", + " 1\n", + " fishing\n", + " 2.048628\n", " 0.95\n", + " -1\n", + " 2.048628\n", + " 1.048628\n", + " 19.513202\n", + " 19.436393\n", + " 1.5\n", + " NaN\n", + " 2.366250\n", + " 19.233744\n", + " 17.755256\n", " \n", " \n", " 1\n", - " 0\n", " 1\n", " fishing\n", - " 2.814534\n", + " 0.147087\n", " 1\n", - " fishing\n", - " 0\n", - " -1.0\n", - " 2.5\n", - " 1.105171\n", - " 1.814534\n", - " 2.317108\n", - " 19.667179\n", - " 18.563510\n", - " 2.546696\n", - " -0.182892\n", + " 1.421523\n", + " 1\n", + " hammock\n", + " NaN\n", " 0.95\n", + " -1\n", + " 0.162556\n", + " -0.837444\n", + " 16.864110\n", + " 18.633215\n", + " 2.5\n", + " NaN\n", + " 3.921523\n", + " 20.032537\n", + " 16.958962\n", " \n", " \n", " 2\n", - " 0\n", - " 2\n", - " fishing\n", - " 3.943674\n", - " 2\n", + " 1\n", + " hammock\n", + " 0.903027\n", + " 1\n", + " -0.351595\n", + " 1\n", " fishing\n", - " 0\n", - " -1.0\n", - " 2.5\n", - " 1.221403\n", - " 2.943674\n", - " 2.974046\n", - " 19.839016\n", - " 18.322118\n", - " 3.228807\n", - " 0.474046\n", + " 0.998000\n", " 0.95\n", + " -1\n", + " 0.998000\n", + " -0.002000\n", + " 15.655857\n", + " 16.481955\n", + " 1.5\n", + " NaN\n", + " 1.148405\n", + " 15.352842\n", + " 14.952039\n", " \n", " \n", " 3\n", - " 0\n", - " 3\n", + " 2\n", + " fishing\n", + " 0.339405\n", + " 1\n", + " -0.930422\n", + " 1\n", " hammock\n", " NaN\n", - " 3\n", - " fishing\n", - " 0\n", - " -1.0\n", - " 2.5\n", - " 1.349859\n", - " 1.096816\n", - " 2.676218\n", - " 16.868733\n", - " 16.916543\n", - " 1.553359\n", - " 0.176218\n", " 0.95\n", + " -1\n", + " 0.414550\n", + " -0.585450\n", + " 13.941609\n", + " 15.291641\n", + " 2.5\n", + " NaN\n", + " 1.569578\n", + " 14.680286\n", + " 13.800745\n", " \n", " \n", " 4\n", - " 0\n", - " 4\n", - " hammock\n", - " NaN\n", - " 3\n", + " 2\n", " hammock\n", - " 0\n", - " -1.0\n", - " 1.5\n", - " 1.349859\n", - " -0.875144\n", - " 0.493640\n", - " 12.590524\n", - " 12.611304\n", - " 0.092495\n", - " -1.006360\n", + " 2.822820\n", + " 1\n", + " -0.420713\n", + " 1\n", + " fishing\n", + " 3.447800\n", " 0.95\n", + " -1\n", + " 3.447800\n", + " 2.447800\n", + " 14.749573\n", + " 12.949235\n", + " 1.5\n", + " NaN\n", + " 1.079287\n", + " 12.114100\n", + " 11.615592\n", " \n", " \n", " 5\n", - " 0\n", - " 5\n", - " hammock\n", - " NaN\n", " 3\n", + " fishing\n", + " 2.015148\n", + " 1\n", + " 2.056790\n", + " 1\n", " hammock\n", - " 0\n", - " -1.0\n", - " 1.5\n", - " 1.349859\n", - " -0.366008\n", - " 1.080653\n", - " 10.680733\n", - " 10.904368\n", - " 0.469673\n", - " -0.419347\n", + " NaN\n", " 0.95\n", + " -1\n", + " 2.720165\n", + " 1.720165\n", + " 12.341990\n", + " 11.180868\n", + " 2.5\n", + " NaN\n", + " 4.556790\n", + " 13.991270\n", + " 9.931031\n", " \n", " \n", " 6\n", - " 0\n", - " 6\n", - " fishing\n", - " 2.475731\n", " 3\n", " hammock\n", - " 0\n", - " -1.0\n", - " 1.5\n", - " 1.349859\n", - " 1.475731\n", - " 0.501567\n", - " 9.867073\n", - " 7.852518\n", - " 1.834067\n", - " -0.998433\n", + " 5.802097\n", + " 1\n", + " -0.090973\n", + " 1\n", + " fishing\n", + " 7.832012\n", " 0.95\n", + " -1\n", + " 7.832012\n", + " 6.832012\n", + " 14.963155\n", + " 8.559098\n", + " 1.5\n", + " NaN\n", + " 1.409027\n", + " 8.503160\n", + " 7.467509\n", " \n", " \n", " 7\n", - " 0\n", - " 7\n", - " hammock\n", - " NaN\n", " 4\n", " fishing\n", - " 0\n", - " -1.0\n", - " 2.5\n", - " 1.491825\n", - " -0.099928\n", - " 2.178192\n", - " 6.065739\n", - " 7.419904\n", - " 0.603336\n", - " -0.321808\n", + " 0.429942\n", + " 1\n", + " 1.443708\n", + " 1\n", + " hammock\n", + " NaN\n", " 0.95\n", + " -1\n", + " 0.641398\n", + " -0.358602\n", + " 5.609535\n", + " 6.282250\n", + " 2.5\n", + " NaN\n", + " 3.943708\n", + " 9.014635\n", + " 5.337817\n", " \n", " \n", " 8\n", - " 0\n", - " 8\n", - " hammock\n", - " NaN\n", " 4\n", " hammock\n", - " 0\n", - " -1.0\n", - " 1.5\n", - " 1.491825\n", - " 0.164102\n", - " 1.257459\n", - " 3.413636\n", - " 3.665488\n", - " 0.780321\n", - " -0.242541\n", + " 0.216153\n", + " 1\n", + " -0.298857\n", + " 1\n", + " hammock\n", + " NaN\n", " 0.95\n", + " -1\n", + " 0.322463\n", + " -0.677537\n", + " 2.612370\n", + " 3.463061\n", + " 1.5\n", + " NaN\n", + " 1.201143\n", + " 3.663504\n", + " 2.591958\n", " \n", " \n", " 9\n", - " 0\n", - " 9\n", - " fishing\n", - " 10.234951\n", " 4\n", " hammock\n", - " 0\n", - " -1.0\n", - " 1.5\n", - " 1.491825\n", - " 9.234951\n", - " 1.501487\n", - " 9.234951\n", - " 1.501487\n", - " 6.860693\n", - " 0.001487\n", + " 7.604617\n", + " 1\n", + " -0.666748\n", + " 1\n", + " fishing\n", + " 11.344756\n", " 0.95\n", + " -1\n", + " 11.344756\n", + " 10.344756\n", + " 10.344756\n", + " 0.000000\n", + " 1.5\n", + " NaN\n", + " 0.833252\n", + " 0.833252\n", + " 0.000000\n", " \n", " \n", - " 10\n", - " 1\n", - " 0\n", - " hammock\n", - " NaN\n", + " 1\n", + " 0\n", " 0\n", " hammock\n", - " 0\n", - " -1.0\n", - " 1.5\n", - " 1.000000\n", - " -0.852913\n", - " 2.921523\n", - " 17.729351\n", - " 19.891338\n", - " 0.147087\n", - " 1.421523\n", + " 1.247475\n", + " 1\n", + " -2.189594\n", + " 1\n", + " fishing\n", + " 1.247475\n", " 0.95\n", + " -1\n", + " 1.247475\n", + " 0.247475\n", + " 18.712048\n", + " 19.436393\n", + " 1.5\n", + " NaN\n", + " -0.689594\n", + " 16.177899\n", + " 17.755256\n", " \n", " \n", - " 11\n", - " 1\n", + " 1\n", " 1\n", " fishing\n", - " 8.465755\n", - " 0\n", + " 0.802887\n", + " 1\n", + " 0.931514\n", + " 1\n", " hammock\n", - " 0\n", - " -1.0\n", - " 1.5\n", - " 1.000000\n", - " 7.465755\n", - " 0.400790\n", - " 24.238470\n", - " 15.680288\n", - " 8.465755\n", - " -1.099210\n", + " NaN\n", " 0.95\n", + " -1\n", + " 0.887327\n", + " -0.112673\n", + " 17.588881\n", + " 18.633215\n", + " 2.5\n", + " NaN\n", + " 3.431514\n", + " 19.542528\n", + " 16.958962\n", " \n", " \n", - " 12\n", + " 2\n", " 1\n", - " 2\n", " hammock\n", - " NaN\n", + " 0.213228\n", " 1\n", - " fishing\n", - " 0\n", - " -1.0\n", - " 2.5\n", - " 1.105171\n", - " 0.024021\n", - " 3.110792\n", - " 15.853940\n", - " 17.503773\n", - " 0.926573\n", - " 0.610792\n", + " 1.803811\n", + " 1\n", + " hammock\n", + " NaN\n", " 0.95\n", + " -1\n", + " 0.235653\n", + " -0.764347\n", + " 14.893510\n", + " 16.481955\n", + " 1.5\n", + " NaN\n", + " 3.303811\n", + " 17.508248\n", + " 14.952039\n", " \n", " \n", - " 13\n", + " 3\n", " 1\n", - " 3\n", - " fishing\n", - " 0.592936\n", + " hammock\n", + " 1.824770\n", + " 1\n", + " 0.998793\n", " 1\n", " hammock\n", - " 0\n", - " -1.0\n", - " 1.5\n", - " 1.105171\n", - " -0.407064\n", - " 0.844335\n", - " 13.453224\n", - " 13.362810\n", - " 0.536511\n", - " -0.655665\n", + " NaN\n", " 0.95\n", + " -1\n", + " 2.016682\n", + " 1.016682\n", + " 14.671037\n", + " 14.373005\n", + " 1.5\n", + " NaN\n", + " 2.498793\n", + " 14.820062\n", + " 12.969757\n", " \n", " \n", - " 14\n", + " 4\n", " 1\n", - " 4\n", - " fishing\n", - " 4.140511\n", - " 2\n", - " fishing\n", - " 0\n", - " -1.0\n", - " 2.5\n", - " 1.221403\n", - " 3.140511\n", - " 2.861062\n", - " 15.765453\n", - " 14.212800\n", - " 3.389964\n", - " 0.361062\n", + " hammock\n", + " 0.602507\n", + " 1\n", + " 0.905523\n", + " 1\n", + " hammock\n", + " NaN\n", " 0.95\n", + " -1\n", + " 0.665873\n", + " -0.334127\n", + " 11.258946\n", + " 12.203234\n", + " 1.5\n", + " NaN\n", + " 2.405523\n", + " 12.794276\n", + " 10.935529\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Identifier Period Choice Wage Experience_Fishing \\\n", - "0 0 0 fishing 2.048628 0 \n", - "1 0 1 fishing 2.814534 1 \n", - "2 0 2 fishing 3.943674 2 \n", - "3 0 3 hammock NaN 3 \n", - "4 0 4 hammock NaN 3 \n", - "5 0 5 hammock NaN 3 \n", - "6 0 6 fishing 2.475731 3 \n", - "7 0 7 hammock NaN 4 \n", - "8 0 8 hammock NaN 4 \n", - "9 0 9 fishing 10.234951 4 \n", - "10 1 0 hammock NaN 0 \n", - "11 1 1 fishing 8.465755 0 \n", - "12 1 2 hammock NaN 1 \n", - "13 1 3 fishing 0.592936 1 \n", - "14 1 4 fishing 4.140511 2 \n", + " Experience_Fishing Lagged_Choice_1 Shock_Reward_Fishing \\\n", + "Identifier Period \n", + "0 0 0 hammock 2.048628 \n", + " 1 1 fishing 0.147087 \n", + " 2 1 hammock 0.903027 \n", + " 3 2 fishing 0.339405 \n", + " 4 2 hammock 2.822820 \n", + " 5 3 fishing 2.015148 \n", + " 6 3 hammock 5.802097 \n", + " 7 4 fishing 0.429942 \n", + " 8 4 hammock 0.216153 \n", + " 9 4 hammock 7.604617 \n", + "1 0 0 hammock 1.247475 \n", + " 1 1 fishing 0.802887 \n", + " 2 1 hammock 0.213228 \n", + " 3 1 hammock 1.824770 \n", + " 4 1 hammock 0.602507 \n", + "\n", + " Meas_Error_Wage_Fishing Shock_Reward_Hammock \\\n", + "Identifier Period \n", + "0 0 1 0.866250 \n", + " 1 1 1.421523 \n", + " 2 1 -0.351595 \n", + " 3 1 -0.930422 \n", + " 4 1 -0.420713 \n", + " 5 1 2.056790 \n", + " 6 1 -0.090973 \n", + " 7 1 1.443708 \n", + " 8 1 -0.298857 \n", + " 9 1 -0.666748 \n", + "1 0 1 -2.189594 \n", + " 1 1 0.931514 \n", + " 2 1 1.803811 \n", + " 3 1 0.998793 \n", + " 4 1 0.905523 \n", + "\n", + " Meas_Error_Wage_Hammock Choice Wage Discount_Rate \\\n", + "Identifier Period \n", + "0 0 1 fishing 2.048628 0.95 \n", + " 1 1 hammock NaN 0.95 \n", + " 2 1 fishing 0.998000 0.95 \n", + " 3 1 hammock NaN 0.95 \n", + " 4 1 fishing 3.447800 0.95 \n", + " 5 1 hammock NaN 0.95 \n", + " 6 1 fishing 7.832012 0.95 \n", + " 7 1 hammock NaN 0.95 \n", + " 8 1 hammock NaN 0.95 \n", + " 9 1 fishing 11.344756 0.95 \n", + "1 0 1 fishing 1.247475 0.95 \n", + " 1 1 hammock NaN 0.95 \n", + " 2 1 hammock NaN 0.95 \n", + " 3 1 hammock NaN 0.95 \n", + " 4 1 hammock NaN 0.95 \n", + "\n", + " Nonpecuniary_Reward_Fishing Wage_Fishing \\\n", + "Identifier Period \n", + "0 0 -1 2.048628 \n", + " 1 -1 0.162556 \n", + " 2 -1 0.998000 \n", + " 3 -1 0.414550 \n", + " 4 -1 3.447800 \n", + " 5 -1 2.720165 \n", + " 6 -1 7.832012 \n", + " 7 -1 0.641398 \n", + " 8 -1 0.322463 \n", + " 9 -1 11.344756 \n", + "1 0 -1 1.247475 \n", + " 1 -1 0.887327 \n", + " 2 -1 0.235653 \n", + " 3 -1 2.016682 \n", + " 4 -1 0.665873 \n", "\n", - " Lagged_Choice_1 Type Nonpecuniary_Reward_Fishing \\\n", - "0 hammock 0 -1.0 \n", - "1 fishing 0 -1.0 \n", - "2 fishing 0 -1.0 \n", - "3 fishing 0 -1.0 \n", - "4 hammock 0 -1.0 \n", - "5 hammock 0 -1.0 \n", - "6 hammock 0 -1.0 \n", - "7 fishing 0 -1.0 \n", - "8 hammock 0 -1.0 \n", - "9 hammock 0 -1.0 \n", - "10 hammock 0 -1.0 \n", - "11 hammock 0 -1.0 \n", - "12 fishing 0 -1.0 \n", - "13 hammock 0 -1.0 \n", - "14 fishing 0 -1.0 \n", + " Flow_Utility_Fishing Value_Function_Fishing \\\n", + "Identifier Period \n", + "0 0 1.048628 19.513202 \n", + " 1 -0.837444 16.864110 \n", + " 2 -0.002000 15.655857 \n", + " 3 -0.585450 13.941609 \n", + " 4 2.447800 14.749573 \n", + " 5 1.720165 12.341990 \n", + " 6 6.832012 14.963155 \n", + " 7 -0.358602 5.609535 \n", + " 8 -0.677537 2.612370 \n", + " 9 10.344756 10.344756 \n", + "1 0 0.247475 18.712048 \n", + " 1 -0.112673 17.588881 \n", + " 2 -0.764347 14.893510 \n", + " 3 1.016682 14.671037 \n", + " 4 -0.334127 11.258946 \n", "\n", - " Nonpecuniary_Reward_Hammock Wage_Fishing Flow_Utility_Fishing \\\n", - "0 1.5 1.000000 1.048628 \n", - "1 2.5 1.105171 1.814534 \n", - "2 2.5 1.221403 2.943674 \n", - "3 2.5 1.349859 1.096816 \n", - "4 1.5 1.349859 -0.875144 \n", - "5 1.5 1.349859 -0.366008 \n", - "6 1.5 1.349859 1.475731 \n", - "7 2.5 1.491825 -0.099928 \n", - "8 1.5 1.491825 0.164102 \n", - "9 1.5 1.491825 9.234951 \n", - "10 1.5 1.000000 -0.852913 \n", - "11 1.5 1.000000 7.465755 \n", - "12 2.5 1.105171 0.024021 \n", - "13 1.5 1.105171 -0.407064 \n", - "14 2.5 1.221403 3.140511 \n", + " Continuation_Value_Fishing Nonpecuniary_Reward_Hammock \\\n", + "Identifier Period \n", + "0 0 19.436393 1.5 \n", + " 1 18.633215 2.5 \n", + " 2 16.481955 1.5 \n", + " 3 15.291641 2.5 \n", + " 4 12.949235 1.5 \n", + " 5 11.180868 2.5 \n", + " 6 8.559098 1.5 \n", + " 7 6.282250 2.5 \n", + " 8 3.463061 1.5 \n", + " 9 0.000000 1.5 \n", + "1 0 19.436393 1.5 \n", + " 1 18.633215 2.5 \n", + " 2 16.481955 1.5 \n", + " 3 14.373005 1.5 \n", + " 4 12.203234 1.5 \n", "\n", - " Flow_Utility_Hammock Value_Function_Fishing Value_Function_Hammock \\\n", - "0 2.366250 19.630892 19.336065 \n", - "1 2.317108 19.667179 18.563510 \n", - "2 2.974046 19.839016 18.322118 \n", - "3 2.676218 16.868733 16.916543 \n", - "4 0.493640 12.590524 12.611304 \n", - "5 1.080653 10.680733 10.904368 \n", - "6 0.501567 9.867073 7.852518 \n", - "7 2.178192 6.065739 7.419904 \n", - "8 1.257459 3.413636 3.665488 \n", - "9 1.501487 9.234951 1.501487 \n", - "10 2.921523 17.729351 19.891338 \n", - "11 0.400790 24.238470 15.680288 \n", - "12 3.110792 15.853940 17.503773 \n", - "13 0.844335 13.453224 13.362810 \n", - "14 2.861062 15.765453 14.212800 \n", + " Wage_Hammock Flow_Utility_Hammock Value_Function_Hammock \\\n", + "Identifier Period \n", + "0 0 NaN 2.366250 19.233744 \n", + " 1 NaN 3.921523 20.032537 \n", + " 2 NaN 1.148405 15.352842 \n", + " 3 NaN 1.569578 14.680286 \n", + " 4 NaN 1.079287 12.114100 \n", + " 5 NaN 4.556790 13.991270 \n", + " 6 NaN 1.409027 8.503160 \n", + " 7 NaN 3.943708 9.014635 \n", + " 8 NaN 1.201143 3.663504 \n", + " 9 NaN 0.833252 0.833252 \n", + "1 0 NaN -0.689594 16.177899 \n", + " 1 NaN 3.431514 19.542528 \n", + " 2 NaN 3.303811 17.508248 \n", + " 3 NaN 2.498793 14.820062 \n", + " 4 NaN 2.405523 12.794276 \n", "\n", - " Shock_Reward_Fishing Shock_Reward_Hammock Discount_Rate \n", - "0 2.048628 0.866250 0.95 \n", - "1 2.546696 -0.182892 0.95 \n", - "2 3.228807 0.474046 0.95 \n", - "3 1.553359 0.176218 0.95 \n", - "4 0.092495 -1.006360 0.95 \n", - "5 0.469673 -0.419347 0.95 \n", - "6 1.834067 -0.998433 0.95 \n", - "7 0.603336 -0.321808 0.95 \n", - "8 0.780321 -0.242541 0.95 \n", - "9 6.860693 0.001487 0.95 \n", - "10 0.147087 1.421523 0.95 \n", - "11 8.465755 -1.099210 0.95 \n", - "12 0.926573 0.610792 0.95 \n", - "13 0.536511 -0.655665 0.95 \n", - "14 3.389964 0.361062 0.95 " + " Continuation_Value_Hammock \n", + "Identifier Period \n", + "0 0 17.755256 \n", + " 1 16.958962 \n", + " 2 14.952039 \n", + " 3 13.800745 \n", + " 4 11.615592 \n", + " 5 9.931031 \n", + " 6 7.467509 \n", + " 7 5.337817 \n", + " 8 2.591958 \n", + " 9 0.000000 \n", + "1 0 17.755256 \n", + " 1 16.958962 \n", + " 2 14.952039 \n", + " 3 12.969757 \n", + " 4 10.935529 " ] }, "execution_count": 7, @@ -781,7 +898,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEXCAYAAAD4LtBgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAUf0lEQVR4nO3df7BdZ13v8feHhE5/8KNIolOaxFRvQcKvUk4DiEKxAg04VFTGBoShA8Z4WyzcP7TcUQR1RhF/jUMhN5RSuBeoSqsUjS2IQEFoTVLaJmkp5qalPaTaVGqlApa0X//Y65Tt5iRn52Sf7udkv18zZ7LXs5699vfsnORz1rPWfp5UFZIkteYR4y5AkqTZGFCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJi0d1wsvW7asVq9ePa6XlyQ1Yvv27XdX1fLB9rEF1OrVq9m2bdu4Xl6S1IgkX52t3SE+SVKTDChJUpMMKElSkwwoSVKT5gyoJBcnuSvJzgPsT5I/TbI7yY1JTh19mZKkSTPMGdQlwJkH2b8OOLn72gC85/DLkiRNujkDqqquBr5+kC5nAR+snmuA45OcMKoCJUmTaRTXoE4E7ujbnu7aJEmat1F8UDeztM26CmKSDfSGAVm1atVwR3/bY+db1yzHund0x4J2axtlXdBubZPy9wnt1ubP2jyPZ23DGMUZ1DSwsm97BbB3to5Vtbmqpqpqavny75nVQpKkh4wioK4AXtvdzfcc4N6qunMEx5UkTbA5h/iSfAQ4HViWZBr4TeCRAFW1CdgCvBTYDXwTOGehipUkTY45A6qq1s+xv4BzR1aRJEk4k4QkqVEGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSUMFVJIzk9ySZHeSC2bZ/9gkH09yQ5JdSc4ZfamSpEkyZ0AlWQJcCKwD1gDrk6wZ6HYucFNVPQM4HfjDJEeNuFZJ0gQZ5gxqLbC7qvZU1f3ApcBZA30KeHSSAI8Cvg7sH2mlkqSJMkxAnQjc0bc93bX1exfwZGAvsAM4v6oeHDxQkg1JtiXZtm/fvnmWLEmaBMMEVGZpq4HtlwDXA08ATgHeleQx3/Okqs1VNVVVU8uXLz/kYiVJk2OYgJoGVvZtr6B3ptTvHODy6tkN3Ar8yGhKlCRNomECaitwcpKTuhsfzgauGOhzO3AGQJIfAJ4E7BlloZKkybJ0rg5VtT/JecBVwBLg4qralWRjt38T8NvAJUl20BsS/LWqunsB65YkHeHmDCiAqtoCbBlo29T3eC/w4tGWJkmaZM4kIUlqkgElSWqSASVJapIBJUlqkgElSWqSASVJapIBJUlqkgElSWqSASVJapIBJUlqkgElSWqSASVJapIBJUlqkgElSWqSASVJapIBJUlqkgElSWqSASVJapIBJUlqkgElSWqSASVJapIBJUlqkgElSWqSASVJapIBJUlqkgElSWqSASVJapIBJUlqkgElSWqSASVJapIBJUlq0lABleTMJLck2Z3kggP0OT3J9Ul2JfnsaMuUJE2apXN1SLIEuBB4ETANbE1yRVXd1NfneODdwJlVdXuS71+ogiVJk2GYM6i1wO6q2lNV9wOXAmcN9HkVcHlV3Q5QVXeNtkxJ0qQZJqBOBO7o257u2vo9EXhcks8k2Z7ktaMqUJI0meYc4gMyS1vNcpxnAWcAxwBfTHJNVX3lvx0o2QBsAFi1atWhVytJmhjDnEFNAyv7tlcAe2fpc2VV/UdV3Q1cDTxj8EBVtbmqpqpqavny5fOtWZI0AYYJqK3AyUlOSnIUcDZwxUCfjwE/nmRpkmOBZwM3j7ZUSdIkmXOIr6r2JzkPuApYAlxcVbuSbOz2b6qqm5NcCdwIPAhcVFU7F7JwSdKRbZhrUFTVFmDLQNumge13Au8cXWmSpEnmTBKSpCYZUJKkJg01xDdOq7/94ZEd67aRHUmStNA8g5IkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ1qfkVdXXoRrkKMbgSsaTx8AxKktQkA0qS1CQDSpLUJK9BHYZRXuu5bWRHkqQjg2dQkqQmeQalh5VnnZKGZUBJi4DBrklkQEkdQ+DI4t/n4uc1KElSkwwoSVKThgqoJGcmuSXJ7iQXHKTfaUkeSPJzoytRkjSJ5gyoJEuAC4F1wBpgfZI1B+j3DuCqURcpSZo8w9wksRbYXVV7AJJcCpwF3DTQ743AZcBpI61Qko4w3sAxnGGG+E4E7ujbnu7aHpLkROAVwKbRlSZJmmTDBFRmaauB7T8Bfq2qHjjogZINSbYl2bZv375ha5QkTaBhhvimgZV92yuAvQN9poBLkwAsA16aZH9V/VV/p6raDGwGmJqaGgw5SYuMa49pIQ0TUFuBk5OcBHwNOBt4VX+Hqjpp5nGSS4C/HgwnSZIOxZwBVVX7k5xH7+68JcDFVbUrycZuv9edJEkjN9RUR1W1Bdgy0DZrMFXV6w6/LEnSpHMmCUlSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSk4aazVySNBlGuQjlbYf5fM+gJElNMqAkSU0yoCRJTTKgJElNMqAkSU0yoCRJTTKgJElNMqAkSU0yoCRJTTKgJElNMqAkSU0yoCRJTTKgJElNMqAkSU0yoCRJTTKgJElNMqAkSU0aKqCSnJnkliS7k1wwy/5XJ7mx+/pCkmeMvlRJ0iSZM6CSLAEuBNYBa4D1SdYMdLsVeEFVPR34bWDzqAuVJE2WYc6g1gK7q2pPVd0PXAqc1d+hqr5QVfd0m9cAK0ZbpiRp0gwTUCcCd/RtT3dtB/J64G8PpyhJkpYO0SeztNWsHZMX0guoHzvA/g3ABoBVq1YNWaIkaRINcwY1Dazs214B7B3slOTpwEXAWVX1r7MdqKo2V9VUVU0tX758PvVKkibEMAG1FTg5yUlJjgLOBq7o75BkFXA58Jqq+sroy5QkTZo5h/iqan+S84CrgCXAxVW1K8nGbv8m4K3A44F3JwHYX1VTC1e2JOlIN8w1KKpqC7BloG1T3+M3AG8YbWmSpEnmTBKSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJg0VUEnOTHJLkt1JLphlf5L8abf/xiSnjr5USdIkmTOgkiwBLgTWAWuA9UnWDHRbB5zcfW0A3jPiOiVJE2aYM6i1wO6q2lNV9wOXAmcN9DkL+GD1XAMcn+SEEdcqSZogwwTUicAdfdvTXduh9pEkaWipqoN3SF4JvKSq3tBtvwZYW1Vv7OvzN8DvVtXnu+1PAb9aVdsHjrWB3hAgwJOAW0b1jQDLgLtHeLxRabUusLb5arW2VusCa5uPVuuC0df2g1W1fLBx6RBPnAZW9m2vAPbOow9VtRnYPMRrHrIk26pqaiGOfTharQusbb5ara3VusDa5qPVuuDhq22YIb6twMlJTkpyFHA2cMVAnyuA13Z38z0HuLeq7hxxrZKkCTLnGVRV7U9yHnAVsAS4uKp2JdnY7d8EbAFeCuwGvgmcs3AlS5ImwTBDfFTVFnoh1N+2qe9xAeeOtrRDtiBDhyPQal1gbfPVam2t1gXWNh+t1gUPU21z3iQhSdI4ONWRJKlJiz6g5pqGaVySXJzkriQ7x13LoCQrk3w6yc1JdiU5f9w1ASQ5Osk/Jrmhq+vt465pUJIlSb6U5K/HXUu/JLcl2ZHk+iTbxl1PvyTHJ/loki93P3PPbaCmJ3Xv1czXvyd507jrmpHkzd2/gZ1JPpLk6HHXNCPJ+V1duxb6PVvUQ3zdNExfAV5E71b3rcD6qrpprIUBSZ4P3Edvho2njrueft0sHydU1XVJHg1sB3563O9bkgDHVdV9SR4JfB44v5udpAlJ/hcwBTymqn5q3PXMSHIbMFVVzX1uJskHgM9V1UXdncDHVtW/jbuuGd3/I18Dnl1VX22gnhPp/eyvqapvJflzYEtVXTLeyiDJU+nNJrQWuB+4EvjlqvqnhXi9xX4GNcw0TGNRVVcDXx93HbOpqjur6rru8TeAm2lg5o9uqqz7us1Hdl/N/AaVZAXwMuCicdeyWCR5DPB84H0AVXV/S+HUOQP4/y2EU5+lwDFJlgLHMsvnSsfkycA1VfXNqtoPfBZ4xUK92GIPKKdYOkxJVgPPBK4dbyU93RDa9cBdwCerqom6On8C/Crw4LgLmUUBn0iyvZuxpRU/BOwD3t8NjV6U5LhxFzXgbOAj4y5iRlV9DfgD4HbgTnqfK/3EeKt6yE7g+Uken+RYeh8vWjnHc+ZtsQdUZmlr5jfu1iV5FHAZ8Kaq+vdx1wNQVQ9U1Sn0ZiNZ2w0pjF2SnwLuGpy+qyHPq6pT6a0scG43xNyCpcCpwHuq6pnAfwAtXSs+Cng58BfjrmVGksfRGwk6CXgCcFySXxhvVT1VdTPwDuCT9Ib3bgD2L9TrLfaAGmqKJX2v7hrPZcCHqurycdczqBsG+gxw5phLmfE84OXdtZ5LgZ9I8v/GW9J3VdXe7s+7gL+kN/zdgmlguu9M+KP0AqsV64Drqupfxl1In58Ebq2qfVX1HeBy4EfHXNNDqup9VXVqVT2f3mWMBbn+BIs/oIaZhkkDupsR3gfcXFV/NO56ZiRZnuT47vEx9P6hfnm8VfVU1VuqakVVrab3c/b3VdXEb7VJjutudqEbPnsxvaGYsauqfwbuSPKkrukMYOw3MfVZT0PDe53bgeckObb7t3oGvevETUjy/d2fq4CfYQHfv6FmkmjVgaZhGnNZACT5CHA6sCzJNPCbVfW+8Vb1kOcBrwF2dNd7AP53N2PIOJ0AfKC7q+oRwJ9XVVO3czfqB4C/7P1fxlLgw1V15XhL+m/eCHyo+yVyD41MhdZdQ3kR8EvjrqVfVV2b5KPAdfSGz75EW7NKXJbk8cB3gHOr6p6FeqFFfZu5JOnItdiH+CRJRygDSpLUJANKktQkA0qS1CQDSpLUJANKOkxJHuhmxN6Z5C+625cP5fkXJVlzCP1fl+Rdh16ptLgYUNLh+1ZVndLNWn8/sHHYJyZZUlVvGPdM8lKLDChptD4H/A+AJL/QrW91fZL/030AmST3JfmtJNcCz03ymSRT3b713bpOO5O8Y+agSc5J8pUkn6X3QWvpiGdASSPSLY2wjt4MHU8Gfp7eJK6nAA8Ar+66HgfsrKpnV9Xn+57/BHoTcf4EcApwWpKf7tbveju9YHoRMPRwoLSYLeqpjqRGHNM3ZdTn6M1zuAF4FrC1m4LoGHpLiEAvrC6b5TinAZ+pqn0AST5Eby0lBtr/DHjiAnwfUlMMKOnwfas7S3pIN8nnB6rqLbP0/3ZVPTBL+2zLx8xwTjJNHIf4pIXxKeDn+mZ+/r4kPzjHc64FXpBkWXe9aj29FUuvBU7vFol7JPDKhSxcaoVnUNICqKqbkvw6vVVuH0E38zNwwGXFq+rOJG8BPk3vbGpLVX0MIMnbgC/SW2H1Onqz90tHNGczlyQ1ySE+SVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoLUpJfiXJzUnuSXLBQfodcPXZJFuSHL9wVbYtyeokO8ddxzCS3JZk2bjr0MPLufh02FZf8DcjnS/rtt972cFm9Z7xP4F1VXXrfF+nql463+eO3NseO9o5x9527zDvodQ0z6C06CTZBPwQcEWSN8+cISV5ZbcS7Q1Jru57yhOSXJnkn5L8ft9xbutmDl/dnY29N8muJJ9IckzX57QkNyb5YpJ3LpYzjkOwZPD7TvKLSbZ27+NlSY4FSHJJkvck+XSSPUlekOTi7r27ZOaA3YrB70iyPcnfJVnbrRq8J8nLuz5HJ3l/t3rwl5K8sGtfkuQPuvYbk7yxv9iuviuT/OLD+B5pTAwoLTpVtRHYC7wQuKdv11uBl1TVM4CX97WfQm9126cBP59k5SyHPRm4sKqeAvwb8LNd+/uBjVX1XHoLDR5pZvu+L6+q07r38Wbg9X39H0dvxd83Ax8H/hh4CvC0JDNrYh1Hb4HFZwHfAH6H3krArwB+q+tzLkBVPY3esiIfSHI0vYUeTwKeWVVPBz7U99qP6l7zw1X13tG9BWqVAaUjyT8Al3S/XfcvR/Gpqrq3qr4N3ATMti7TrVU1syrudmB1d33q0VX1ha79wwtV+Bh9z/cNPDXJ55LsoLdM/VP6+n+8eksg7AD+pap2VNWDwK7uuQD3A1d2j3cAn62q73SPZ/r8GPB/Aarqy/SWIXki8JPApqra3+37et9rfwx4f1V9cATftxYBA0pHjO7M6teBlcD1SR7f7frPvm4PMPu119n6TMJ1nNm+70uA87qzm7cDR8/S/8GB5z7Id9/X79R31/F5qF8XZDN9DvTehgOvHvwPwLputWJNAANKR4wkP1xV11bVW4G76QXVvFXVPcA3kjynazr7cGtcJB4N3Nmt3vvqBXqNq2eOneSJwCrgFuATwMYkS7t939f3nLcC/wq8e4FqUmMMKB1J3tldXN9J7z/AG0ZwzNcDm5N8kd5v9/eO4Jit+w16y8x/EvjyAr3Gu+ndoLED+DPgdVX1n8BFwO3AjUluAF418Lw3AUf33+yiI5cr6koHkeRRVXVf9/gC4ISqOn/MZUkTwc9BSQf3siRvofdv5avA68ZbjjQ5PIOSJDXJa1CSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCb9FxEmlm7HrTS9AAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAERCAYAAAA0S9PzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAXeklEQVR4nO3debycV33f8c/XlvEiWcaLEDEgxGJTMI2dohQKddiXFAhO3VBiYQwNcV52SAuBUpIXrteGpSwNYJy4wTZgMEsxYGwgLMaJnVBAhJpWISgYJONFtrwgS8ILVn7943kE4+ure+daczVHo8/79ZqX7jxz5pnfnbma7zxnznNOqgpJksZtj3EXIEkSGEiSpEYYSJKkJhhIkqQmGEiSpCYsGHcBAIccckgtX7583GVIkubZt7/97Vuqasl0tzURSMuXL2fVqlXjLkOSNM+SrNvebXbZSZKaYCBJkppgIEmSmmAgSZKaYCBJkpowVCAleU2SVUnuTnLBLG1fl2R9ko1Jzkuy90gqlSRNtGGPkG4AzgLOm6lRkucDbwKeDSwHHg2cvgP1SZJ2E0MFUlVdXFWfAW6dpekJwAeqanVV3Q6cCbxyx0qUJO0ORn1i7BHAZweuXw0sTXJwVd0nzJKcCJwIsGzZsuH2ftoBo6kS4LSNI9yXdc1tX9Y1t31Z19z2ZV1z21c7dY16UMMiYLCibT/vP7VhVZ1bVSuqasWSJdPOIiFJ2o2MOpA2A4sHrm/7edOIH0eSNGFGHUirgSMHrh8J3DS1u06SpKmGHfa9IMk+wJ7Ankn2STLd908fAn4nyROSHAi8GbhgZNVKkibWsEdIbwbupBvS/fL+5zcnWZZkc5JlAFX1ReDtwNeAdf3l1JFXLUmaOEONsquq04DTtnPzoilt3wW8a4eqkiTtdpw6SJLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1IShAinJQUk+nWRLknVJjttOuyQ5K8n1STYmuSLJEaMtWZI0iYY9QjobuAdYCqwEztlO0PwW8B+Ao4GDgK8DHx5BnZKkCTdrICVZCBwLnFJVm6vqKuAS4Phpmj8KuKqqflhVW4ELgSeMsmBJ0mQa5gjpcGBrVa0Z2HY1MN0R0seAxyY5PMlewAnAF6fbaZITk6xKsmrDhg1zrVuSNGEWDNFmEbBxyraNwP7TtL0RuBL4PrAV+DHwrOl2WlXnAucCrFixooasV5I0oYY5QtoMLJ6ybTGwaZq2pwK/CjwC2Ac4Hbg8yX47UqQkafINE0hrgAVJDhvYdiSwepq2RwIfr6rrqureqroAOBC/R5IkzWLWQKqqLcDFwBlJFiZ5GvASph899y3gt5IsTbJHkuOBvYAfjLJoSdLkGeY7JICTgfOAm4FbgZOqanWSZcDfA0+oqmuBtwEPAf4PsJAuiI6tqp+MvHJJ0kQZKpCq6jbgmGm2X0s36GHb9buA3+8vkiQNzamDJElNMJAkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU0YKpCSHJTk00m2JFmX5LgZ2j46yaVJNiW5JcnbR1euJGlSDXuEdDZwD7AUWAmck+SIqY2SPAj4MnA58FDg4cCFoylVkjTJZg2kJAuBY4FTqmpzVV0FXAIcP03zVwI3VNW7qmpLVd1VVd8dacWSpIk0zBHS4cDWqlozsO1q4H5HSMBTgLVJvtB3112R5J+PolBJ0mQbJpAWARunbNsI7D9N24cDLwPeAxwKXAZ8tu/Ku48kJyZZlWTVhg0b5la1JGniDBNIm4HFU7YtBjZN0/ZO4Kqq+kJV3QO8AzgYePzUhlV1blWtqKoVS5YsmWPZkqRJM0wgrQEWJDlsYNuRwOpp2n4XqFEUJknavcwaSFW1BbgYOCPJwiRPA14CfHia5hcCT0nynCR7Aq8FbgG+N8KaJUkTaNhh3ycD+wI3AxcBJ1XV6iTLkmxOsgygqr4PvBz4M+B2uuD6jb77TpKk7VowTKOqug04Zprt19INehjcdjHdEZUkSUNz6iBJUhMMJElSEwwkSVITDCRJUhOGGtTQiuV3fXRk+1o7sj1JkkbBIyRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITdqn1kDQZXNdK0nQMpAnmG7+kXYlddpKkJhhIkqQm2GUnNc6uV+0uDCSp5xu/NF522UmSmmAgSZKaYJedpAfELk6NmkdIkqQmDHWElOQg4APA84BbgD+qqhk/HiW5HHgmsFdV3bujhbbMT4qSZuP7xOyG7bI7G7gHWAocBVyW5OqqWj1d4yQr57BvSRoZ3/h3XbN22SVZCBwLnFJVm6vqKuAS4PjttD8AOBV44ygLlSRNtmG+Qzoc2FpVawa2XQ0csZ32fwKcA6yfaadJTkyyKsmqDRs2DFWsJGlyDRNIi4CNU7ZtBPaf2jDJCuBpwHtn22lVnVtVK6pqxZIlS4apVZI0wYYJpM3A4inbFgObBjck2QN4P/CfJn0QgyRp9IYJpDXAgiSHDWw7Epg6oGExsAL4eJL1wLf67dclOXqHK5UkTbRZR8JV1ZYkFwNnJHk13Si7lwBPndJ0I3DowPVHAN8EngT4JZEkaUbDnhh7MrAvcDNwEXBSVa1OsizJ5iTLqrN+24VfhNBNVXXPPNQuSZogQ50rVFW3AcdMs/1aukEP091nLZAdKU6StPtw6iBJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSE4ZafkKSNJmW3/XRke1r7Q7e3yMkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSE4YKpCQHJfl0ki1J1iU5bjvtTkjy7SR3JLkuyduTuOaSJGlWwx4hnQ3cAywFVgLnJDlimnb7Aa8FDgGeDDwbeMMI6pQkTbhZj16SLASOBZ5YVZuBq5JcAhwPvGmwbVWdM3D1+iQfAZ45wnolSRNqmCOkw4GtVbVmYNvVwHRHSFP9GrB6uhuSnJhkVZJVGzZsGGJXkqRJNkwgLQI2Ttm2Edh/pjsleRWwAnjHdLdX1blVtaKqVixZsmSYWiVJE2yYAQebgcVTti0GNm3vDkmOAd4KPKeqbnng5UmSdhfDHCGtARYkOWxg25FsvyvuBcD/BF5cVf93x0uUJO0OZg2kqtoCXAyckWRhkqcBLwE+PLVtkmcBHwGOrapvjrpYSdLkGnbY98nAvsDNwEXASVW1OsmyJJuTLOvbnQIcAHy+3745yRdGX7YkadIMddJqVd0GHDPN9mvpBj1su+4Qb0nSA+LUQZKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmDBVISQ5K8ukkW5KsS3LcDG1fl2R9ko1Jzkuy9+jKlSRNqmGPkM4G7gGWAiuBc5IcMbVRkucDbwKeDSwHHg2cPpJKJUkTbdZASrIQOBY4pao2V9VVwCXA8dM0PwH4QFWtrqrbgTOBV46wXknShBrmCOlwYGtVrRnYdjVwvyOkftvVU9otTXLwAy9RkrQ7SFXN3CA5GvhkVT10YNvvAiur6hlT2l4D/H5VfbG/vhddV9+jqmrtlLYnAif2Vx8HfH+HfpNfOAS4ZUT7GiXrmhvrmhvrmhvrmptR1vXIqloy3Q0LhrjzZmDxlG2LgU1DtN328/3aVtW5wLlDPP6cJFlVVStGvd8dZV1zY11zY11zY11zs7PqGqbLbg2wIMlhA9uOBFZP03Z1f9tgu5uq6tYHXqIkaXcwayBV1RbgYuCMJAuTPA14CfDhaZp/CPidJE9IciDwZuCCEdYrSZpQww77PhnYF7gZuAg4qapWJ1mWZHOSZQD9d0dvB74GrOsvp46+7BmNvBtwRKxrbqxrbqxrbqxrbnZKXbMOapAkaWdw6iBJUhMMJElSEyYmkOYy397OlOQ1SVYluTvJBeOuByDJ3kk+0D9Pm5J8J8mvj7sugCQXJrkxyR1J1iR59bhrGpTksCR3Jblw3LUAJLmir2dzfxnV+Xw7LMnLknyv/z95TX9O4zjr2TzlsjXJe8dZ0zZJlif5fJLb+7lA35dkmNNy5ruuxye5vJ+b9AdJfnM+H29iAokh59sbgxuAs4Dzxl3IgAXAj4GnAwcApwCfSLJ8jDVt8xZgeVUtBn4DOCvJk8Zc06CzgW+Nu4gpXlNVi/rL48ZdDECS5wJvA14F7A/8GvDDcdY08BwtonufuBP45DhrGvB+ukFjvwQcRfd/8+RxFtQH4meBS4GD6CYyuDDJ4fP1mBMRSHOcb2+nqqqLq+ozQDPnYlXVlqo6rarWVtU/VdWlwI+Asb/x9/Mg3r3tan95zBhL+rkkLwN+Anx13LXsAk4Hzqiq/93/jV1fVdePu6gB/44uAK4cdyG9RwGfqKq7qmo98EWmn55tZ/pnwKHAu6tqa1VdDvwN8/i+OhGBxNzm29MUSZbSPYfTney80yV5f5KfAv8A3Ah8fswlkWQxcAbw+nHXMo23JLklyd8keca4i0myJ7ACWNJ381zXd0HtO+7aBpwAfKjaGWb8p8DLkuyX5GHAr9OF0jhlO9ueOF8POCmBtAjYOGXbRrquAs2gn2/wI8AHq+ofxl0PQFWdTPfaHU13UvbdM99jpziTbib7H4+7kCn+C90yLw+jO1fkc0nGfUS5FNiL7ijkaLouqF+hO1F+7PrzJp8OfHDctQz4K7oP0HcA1wGrgM+MtaLuA+HNwH9OsleS59E9b/vN1wNOSiDNZb499ZLsQTfjxj3Aa8Zczn30XQRXAQ8HThpnLUmOAp4DvHucdUynqr5RVZuq6u6q+iBdl8q/GXNZd/b/vreqbqyqW4B3Mf66tnkFcFVV/WjchcDP/x/+Jd2Hr4V0E5keSPcd3NhU1c+AY4AXAuvpegc+QReY82JSAmku8+0JSBLgA3SfZo/t//hatIDxf4f0DLoFJ69Nsh54A3Bskr8bZ1HbUUzf1bLzCujWQruur6VFr6Cto6ODgEcA7+s/WNwKnE8DAV5V362qp1fVwVX1fLqj8W/O1+NNRCDNcb69nSrJgiT7AHsCeybZp4XhnMA5wOOBF1fVnbM13hmSPKQfKrwoyZ79CsS/DVw+5tLOpQvFo/rLnwGXAc8fZ1FJHpzk+dv+ppKspBvN9pfjrKt3PvAH/Wt6IPBautFaY5XkqXTdm62MrqM/gvwRcFL/Oj6Y7juuq2e+5/xL8sv939d+Sd5ANwrwgvl6vIkIpN608+2NtySg6ze/k25p95f3P4+1Lz3JI4Hfo3tzXT9wXsbKcdZF94n6JLpP17cD7wBeW1WfHWtRVT+tqvXbLnRdxHdV1YZx1kX3Pc1ZwAa6tWr+ADimqlo4F+lMuuHxa4DvAd8B/ttYK+qcAFxcVa115/9b4AV0r+UPgHuB1421os7xdAOLbgaeDTx3YBTsyDmXnSSpCZN0hCRJ2oUZSJKkJhhIkqQmGEiSpCYYSJKkJhhIkqQmGEjSmCRZmeRLD/C+p7WyJpM0KgaSNEdJ1ia5sz+Z+KYk5ydZNNf9VNVHqup581GjtCsykKQH5sX9Qm//AvhV5jj7RiPTR0lNMZCkHdAvOvcF4IlJDuiXhr8xyfVJzurXBiLJK/v1it6d5DbgtH7bVdv2leSpSb7VLxf9rX7etW23PSrJX6Vbcv7LdDNCSxPFQJJ2QJJH0M3K/B26GaTvBR5Lt/7P84BXDzR/Mt0y3g9hyrxuSQ6im7D1PcDBdMs1XJbk4L7JR4Fv0wXRmXRzskkTxW4D6YH5TJJ76RaCvAz4C7qJRB/cz56+Jcm7gROBP+/vc0NVvbf/+d5uBZCfeyHwj1W1bYb6i5L8R+DFSS6n6xZ8Tj+x5V8n+dx8/nLSOBhI0gNzTFV9ZduVJP+SbvbtGweCZg9gcIXZmVabPRRYN2XbOrqlEg4Fbu+XWRm87REPrHSpTQaSNBo/pltq/ZCqunc7bWaaWv8G4JFTti0Dvkg3/f+BSRYOhNKyWfYn7XL8Dkkagaq6EfgS8M4ki5PskeQxSZ4+5C4+Dxye5Lh+kbZ/DzwBuLSq1gGrgNOTPCjJvwZePC+/iDRGBpI0Oq8AHgT8Pd0Cg/+LboXNWfXLVr8IeD1wK/BG4EX9aqIAx9ENirgNOBX40EgrlxrgAn1qRpLHAR+jG6W2EDi1qs6c5T5rgVcPfp8zcNvRwF9U1ePmodyJNNPzuatJchrw2Kp6+bhr0XD8Dmk3t/xNl83rJ5K1b31hZm/1c28ErqiqXxnFY1fVlcCuEUanHTC/nwxP2ziX10EaC7vs1JJHAqvHXYSk8TCQ1IT+XJtnAu/r54j7aJKz+tsOSXJpkp8kuS3JlUkG/3aPSvLdfoaDjyfZp7/fM5JcN/AYa5O8Ybq2/e1v7GdZuCHJq5NUksfupKegJfd7PpMc2L8GG5Lc3v/88G13SHJFPzPF3/av3+eSHJzkI0nu6GeeWD7QvpKcnOQf+9knzuwHgXy9b/+JJA8aaP+7SX7Qv/6XJDl04LYjkny5v+2mJH889RdKsleSi5J8anC/aouBpCZU1bOAK4HX9HPE3TNw8+uB64AlwFLgj7nvkOeXAi8AHgX8MvDKGR5q2rZJXgD8IfAcuu+whh0dN4mme472AM6nO4pdBtwJvG/K/V4GHE937tRjgK/39zkI+B7dYIxBLwCeBDyFrrv2XGAl3flVTwR+GyDJs4C39HX9Et05WB/rb9sf+Ard8PhD6V67rw4+SJJ9gc/QDct/aVUN/m2pIQaSdgU/o3sjemRV/ayqrqz7jsZ5T1XdUFW3AZ8DjpphX9tr+1Lg/KpaXVU/BU6fh99jV3G/56iqbq2qT1XVT6tqE93UR1ND+/yquqaqNtLN73dNVX2lPy/rk3TTKQ16W1XdUVWrgf8HfKmqfjhw/23tVwLnVdXf9TNV/BHwr/ojrhcB66vqnVV1V1VtqqpvDDzGYrqwugZ4VVVtHcUTpPlhIGlX8N+BHwBfSvLDJG+acvv6gZ9/Csy0FMT22h7K8LMqTLr7PUdJ9kvy50nWJbkD+Gvgweknj+3dNPDzndNcn/q6DNv+PrNYVNVmuqHxD6M7mrpmht/lKXRHeW+d8iFGDTKQ1Lz+U+/rq+rRdCeE/mGSZ4/4YW4EHj5w3Wl57uv1dCMWn1xVi4Ff67fvjNF795nFIslCuglor6f74PCYGe77Jbruvq8mWTqfRWrHGUhqXpIXJXlsukni7gC29pdR+gTwqiSPT7If8F9HvP9d3f50Ry0/6Wcmn/p90Hz6KN1rc1SSvYE/Ab5RVWuBS4GHJnltkr2T7J/kyYN3rqq39/v4ahKX7WiY5yHt5uZ4ntC4HEb3BfoSuhkQ3l9VV4zyAarqC0neA3wN+Ce6JR6Op/sifP61f57Q/6B7U7+F7ojlncAxO+OBq+qrSU4BPgUcCPwt3QAKqmpTkucCf0oXknf3tX5jyj7O7MPsK0me1X8/psY4U4M0jSSPp/uife8ZJkuVNEJ22Um9JL/ZT156IPA24HOGkbTzGEjSL/wesIFu1NZW4KTxliPtXuyykyQ1wSMkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVIT/j9Mu6xbRKh2hQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] @@ -822,7 +939,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -831,7 +948,7 @@ }, { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -896,7 +1013,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -1167,7 +1284,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEXCAYAAAD4LtBgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAV+ElEQVR4nO3dfbRddX3n8ffHBBcPPmAh7UJImnQGrVQUMaCOLWIpStSBOlNXQasjo80wA4rOHy3O6vhQZ63R2lUdl2iaIoJTHmqF1qgZwLEqPsEkPCYhQjNAIcIUqJSCYjHwnT/OvvF4uMk9uTk353dz3q+17srZe//OPt+7c5LP2b+9z++XqkKSpNY8ZdwFSJI0HQNKktQkA0qS1CQDSpLUJANKktQkA0qS1KSF43rhgw8+uJYuXTqul5ckNeK66657oKoWDa4fW0AtXbqU9evXj+vlJUmNSPJ30623i0+S1CQDSpLUJANKktQkA0qS1KQZAyrJ+UnuS7JxB9uT5ONJtiS5OcnRoy9TkjRphjmDugA4aSfbVwCHdz8rgU/tflmSpEk3Y0BV1dXAD3bS5BTgs9VzDXBgkkNGVaAkaTKN4hrUocDdfctbu3WSJM3aKL6om2nWTTsLYpKV9LoBWbJkyXB7f/8zZ1vXNPt6aHT7gnZrG2Vd0G5tk/L3Ce3W5nttlvuztmGM4gxqK7C4b/kw4J7pGlbV6qpaXlXLFy160qgWkiRtN4qAWgO8pbub76XAQ1V17wj2K0maYDN28SW5BDgeODjJVuB9wD4AVbUKWAu8BtgC/Ag4fa6KlSRNjhkDqqpOm2F7AWeOrCJJknAkCUlSowwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKThgqoJCcluTXJliTnTLP9mUm+mOSmJJuSnD76UiVJk2ThTA2SLADOBU4EtgLrkqypqlv6mp0J3FJV/zrJIuDWJBdV1WNzUrV26shlS0a6vw0j3ZskDWeYM6hjgS1VdXsXOJcCpwy0KeDpSQI8DfgBsG2klUqSJsowAXUocHff8tZuXb9PAM8D7qH3gfvsqnpicEdJViZZn2T9/fffP8uSJUmTYJiAyjTramD51cCNwLOBo4BPJHnGk55UtbqqllfV8kWLFu1ysZKkyTFMQG0FFvctH0bvTKnf6cDl1bMFuAP45dGUKEmaRMME1Drg8CTLkjwVOBVYM9DmLuAEgCS/ADwXuH2UhUqSJsuMd/FV1bYkZwFXAguA86tqU5Izuu2rgA8CFyTZQK9L8Per6oE5rFvz1CjvMPTuQmnvNmNAAVTVWmDtwLpVfY/vAV412tIkSZPMkSQkSU0yoCRJTTKgJElNMqAkSU0yoCRJTTKgJElNMqAkSU0yoCRJTTKgJElNMqAkSU0yoCRJTTKgJElNMqAkSU0yoCRJTTKgJElNGmo+qHFa+uOLR7avO0e2J0nSXPMMSpLUJANKktQkA0qS1CQDSpLUJANKktSk5u/ik/aUI5ctGdm+NoxsT9Lk8gxKktQkA0qS1CQDSpLUJANKktQkA0qS1CQDSpLUJANKktQkA0qS1CQDSpLUpKFGkkhyEvA/gAXAeVX1oWnaHA98DNgHeKCqXjHCOpvkXFWSNHdmDKgkC4BzgROBrcC6JGuq6pa+NgcCnwROqqq7kvz8XBUsTSKHYdIkGqaL71hgS1XdXlWPAZcCpwy0eSNweVXdBVBV9422TEnSpBkmoA4F7u5b3tqt6/cc4FlJvp7kuiRvGVWBkqTJNMw1qEyzrqbZz4uBE4D9gO8muaaqbvuZHSUrgZUAS5aMrstCkrT3GeYMaiuwuG/5MOCeadpcUVU/rKoHgKuBFw7uqKpWV9Xyqlq+aNGi2dYsSZoAwwTUOuDwJMuSPBU4FVgz0OYLwK8lWZhkf+AlwObRlipJmiQzdvFV1bYkZwFX0rvN/Pyq2pTkjG77qqranOQK4GbgCXq3om+cy8KlUXt485O+PaF5zDsf57+hvgdVVWuBtQPrVg0sfwT4yOhKk6TZ8wPH/OeU75JmbZRnKeCZin6WASXNA54NaBI5Fp8kqUkGlCSpSXbxaY+yq0rSsAwoSbPmBw7NJbv4JElNMqAkSU0yoCRJTTKgJElN8iYJSdrDlv744pHt686R7ak9nkFJkppkQEmSmmRASZKaZEBJkppkQEmSmmRASZKaZEBJkppkQEmSmmRASZKaZEBJkppkQEmSmmRASZKaZEBJkppkQEmSmmRASZKaZEBJkppkQEmSmmRASZKa5JTvkqTtjly2ZGT72rCbzzegJEnbPbz5Q+MuYbuhuviSnJTk1iRbkpyzk3bHJHk8yW+NrkRJ0iSaMaCSLADOBVYARwCnJTliB+0+DFw56iIlSZNnmDOoY4EtVXV7VT0GXAqcMk27dwCXAfeNsD5J0oQaJqAOBe7uW97ardsuyaHA64FVoytNkjTJhgmoTLOuBpY/Bvx+VT2+0x0lK5OsT7L+/vvvH7ZGSdIEGuYuvq3A4r7lw4B7BtosBy5NAnAw8Jok26rqr/sbVdVqYDXA8uXLB0NOkqTthgmodcDhSZYB3wdOBd7Y36Cqlk09TnIB8KXBcJIkaVfMGFBVtS3JWfTuzlsAnF9Vm5Kc0W33upMkaeSG+qJuVa0F1g6smzaYquqtu1+WJGnSORafJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSQaUJKlJBpQkqUkGlCSpSUPNB6X55eHNHxp3CZK02zyDkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNWmogEpyUpJbk2xJcs4029+U5Obu5ztJXjj6UiVJk2TGgEqyADgXWAEcAZyW5IiBZncAr6iqFwAfBFaPulBJ0mQZ5gzqWGBLVd1eVY8BlwKn9Deoqu9U1YPd4jXAYaMtU5I0aYYJqEOBu/uWt3brduRtwP/anaIkSVo4RJtMs66mbZi8kl5A/eoOtq8EVgIsWbJkyBIlSZNomDOorcDivuXDgHsGGyV5AXAecEpV/cN0O6qq1VW1vKqWL1q0aDb1SpImxDABtQ44PMmyJE8FTgXW9DdIsgS4HHhzVd02+jIlSZNmxi6+qtqW5CzgSmABcH5VbUpyRrd9FfBe4CDgk0kAtlXV8rkrW5K0txvmGhRVtRZYO7BuVd/jtwNvH21pkqRJ5kgSkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYZUJKkJhlQkqQmGVCSpCYNFVBJTkpya5ItSc6ZZnuSfLzbfnOSo0dfqiRpkswYUEkWAOcCK4AjgNOSHDHQbAVwePezEvjUiOuUJE2YYc6gjgW2VNXtVfUYcClwykCbU4DPVs81wIFJDhlxrZKkCTJMQB0K3N23vLVbt6ttJEkaWqpq5w2SNwCvrqq3d8tvBo6tqnf0tfky8N+r6lvd8leB36uq6wb2tZJeFyDAc4FbR/WLAAcDD4xwf6PSal1gbbPVam2t1gXWNhut1gWjr+0Xq2rR4MqFQzxxK7C4b/kw4J5ZtKGqVgOrh3jNXZZkfVUtn4t9745W6wJrm61Wa2u1LrC22Wi1LthztQ3TxbcOODzJsiRPBU4F1gy0WQO8pbub76XAQ1V174hrlSRNkBnPoKpqW5KzgCuBBcD5VbUpyRnd9lXAWuA1wBbgR8Dpc1eyJGkSDNPFR1WtpRdC/etW9T0u4MzRlrbL5qTrcARarQusbbZara3VusDaZqPVumAP1TbjTRKSJI2DQx1Jkpo07wNqpmGYxiXJ+UnuS7Jx3LUMSrI4ydeSbE6yKcnZ464JIMm+Sf5Pkpu6uj4w7poGJVmQ5IYkXxp3Lf2S3JlkQ5Ibk6wfdz39khyY5PNJvte9517WQE3P7Y7V1M8/JXnXuOuakuTd3b+BjUkuSbLvuGuakuTsrq5Nc33M5nUXXzcM023AifRudV8HnFZVt4y1MCDJccAj9EbYeP646+nXjfJxSFVdn+TpwHXAb477uCUJcEBVPZJkH+BbwNnd6CRNSPKfgeXAM6rqdeOuZ0qSO4HlVdXc92aSXAh8s6rO6+4E3r+q/nHcdU3p/h/5PvCSqvq7Buo5lN57/4iqejTJ54C1VXXBeCuDJM+nN5rQscBjwBXAf6yqv52L15vvZ1DDDMM0FlV1NfCDcdcxnaq6t6qu7x4/DGymgZE/uqGyHukW9+l+mvkEleQw4LXAeeOuZb5I8gzgOODTAFX1WEvh1DkB+L8thFOfhcB+SRYC+zPN90rH5HnANVX1o6raBnwDeP1cvdh8DyiHWNpNSZYCLwKuHW8lPV0X2o3AfcBXqqqJujofA34PeGLchUyjgKuSXNeN2NKKXwLuBz7TdY2el+SAcRc14FTgknEXMaWqvg/8MXAXcC+975VeNd6qttsIHJfkoCT70/t60eIZnjNr8z2gMs26Zj5xty7J04DLgHdV1T+Nux6Aqnq8qo6iNxrJsV2XwtgleR1w3+DwXQ15eVUdTW9mgTO7LuYWLASOBj5VVS8Cfgi0dK34qcDJwF+Ou5YpSZ5FrydoGfBs4IAkvzPeqnqqajPwYeAr9Lr3bgK2zdXrzfeAGmqIJT1Zd43nMuCiqrp83PUM6rqBvg6cNOZSprwcOLm71nMp8OtJ/ny8Jf1UVd3T/Xkf8Ff0ur9bsBXY2ncm/Hl6gdWKFcD1VfX34y6kz28Ad1TV/VX1E+By4F+NuabtqurTVXV0VR1H7zLGnFx/gvkfUMMMw6QB3c0InwY2V9WfjLueKUkWJTmwe7wfvX+o3xtvVT1V9Z6qOqyqltJ7n/1NVTXxqTbJAd3NLnTdZ6+i1xUzdlX1/4C7kzy3W3UCMPabmPqcRkPde527gJcm2b/7t3oCvevETUjy892fS4B/wxwev6FGkmjVjoZhGnNZACS5BDgeODjJVuB9VfXp8Va13cuBNwMbuus9AP+lGzFknA4BLuzuqnoK8Lmqaup27kb9AvBXvf/LWAhcXFVXjLekn/EO4KLuQ+TtNDIUWncN5UTgP4y7ln5VdW2SzwPX0+s+u4G2RpW4LMlBwE+AM6vqwbl6oXl9m7kkae8137v4JEl7KQNKktQkA0qS1CQDSpLUJANKktQkA0raTUke70bE3pjkL7vbl3fl+eclOWIX2r81ySd2vVJpfjGgpN33aFUd1Y1a/xhwxrBPTLKgqt4+7pHkpRYZUNJofRP4lwBJfqeb3+rGJH/afQGZJI8k+cMk1wIvS/L1JMu7bad18zptTPLhqZ0mOT3JbUm+Qe+L1tJez4CSRqSbGmEFvRE6ngf8Nr1BXI8CHgfe1DU9ANhYVS+pqm/1Pf/Z9Abi/HXgKOCYJL/Zzd/1AXrBdCIwdHegNJ/N66GOpEbs1zdk1DfpjXO4EngxsK4bgmg/elOIQC+sLptmP8cAX6+q+wGSXERvLiUG1v8F8Jw5+D2kphhQ0u57tDtL2q4b5PPCqnrPNO1/XFWPT7N+uuljpjgmmSaOXXzS3Pgq8Ft9Iz//XJJfnOE51wKvSHJwd73qNHozll4LHN9NErcP8Ia5LFxqhWdQ0hyoqluS/AG9WW6fQjfyM7DDacWr6t4k7wG+Ru9sam1VfQEgyfuB79KbYfV6eqP3S3s1RzOXJDXJLj5JUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDChJUpMMKElSkwwoSVKTDCjtUJJ3Jtmc5MEk5+yk3Q5neE2yNsmBc1dl+/qO40UD65cn+fgOnnNnkoP3TIXtSbI0ycZx1zGMSf+7mkuOxTdPLD3nyyMdk+rOD712ZyNnT/lPwIqqumO2r1NVr5ntc+fCkRceOdLjuOHfbZjVcUyysKrWA+tHWc+ceP8zRzse2vsfGuaYSZ5BaXpJVgG/BKxJ8u6pM6Qkb+hme70pydV9T3l2kiuS/G2SP+rbz53d6NxLu7OIP0uyKclVSfbr2hyT5OYk303ykfnyyXkYA8fxoSSrk1wFfDbJ8Um+1LU7qDsmNyT5U/qm3kjy10mu647bym7d25J8tK/N7yb5kz372825BYPvl+73XNe9/y5Lsj9AkguSfCrJ15LcnuQVSc7v3nMXTO2wm834w93x/N9Jju1mNL49ycldm32TfKab2fiGJK/s1i9I8sfd+puTvKO/2K6+K5L87h48Rns1A0rTqqozgHuAVwIP9m16L/DqqnohcHLf+qPozSB7JPDbSRZPs9vDgXOr6leAfwT+bbf+M8AZVfUyepP57TUGjuNH6U1ieEpVvXGg6fuAb1XVi4A1wJK+bf++ql4MLAfemeQg4FLg5G76DYDT6R3Hvcl075fLq+qY7v23GXhbX/tn0ZuN+N3AF+kd718BjkwyNV/XAfQmf3wx8DDw3+jNUvx64A+7NmcCVNWR9KY8uTDJvvQmoVwGvKiqXgD0d9k+rXvNi6vqz0Z3CCabAaVd9W3ggu5TYv+UD1+tqoeq6sfALcB0cx/dUVVTM89eByztrk89vaq+062/eK4Kb8Saqnp0mvXHAX8OUFVf5mc/FLwzyU3ANcBi4PCq+iHwN8DrkvwysE9VbZjb0ve4J71fgOcn+WaSDcCb6AXQlC9Wb3qGDcDfV9WGqnoC2NQ9F+Ax4Iru8QbgG1X1k+7xVJtfBf4nQFV9j94UKc8BfgNYVVXbum0/6HvtLwCfqarPjuD3VseA0i7pzgj+gN5/lDd2n+YB/rmv2eNMf31zujaTdj3ihzvZ9qRrPUmOp/cf48u6s4YbgH27zecBb2XvPHuC6d8vFwBndWc3H+Cnx6K//RMDz32Cn74ff1I/nWNoe7suyKba7Og9GXY8s/G3gRXdTMoaEQNKuyTJv6iqa6vqvcAD9IJq1qrqQeDhJC/tVp26uzXOU1fTOyMgyQp63VUAzwQerKofdWdKU8eJqrqW3vF/I3DJni13bJ4O3Nt1bb5pjl6j/+/iOfS6W28FrgLOSLKw2/Zzfc95L/APwCfnqKaJZEBpV32ku0i8kd4/5JtGsM+3AauTfJfep9SHRrDP+eYDwHFJrgdeBdzVrb8CWJjkZuCD9Lr5+n0O+HYX9JPgvwLXAl8BvjdHr/FJejdobAD+AnhrVf0zvTPWu4Cbuy7XweuI7wL27b9JSLvHGXU1dkmeVlWPdI/PAQ6pqrPHXNa80N0F+NGq+uq4a5FGzTMoteC1SW7szsp+jd6dVdqJJAcmuQ141HDS3sozKElSkzyDkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ1yYCSJDXJgJIkNcmAkiQ16f8Dsw8h5Oz3l0AAAAAASUVORK5CYII=\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAERCAYAAAA0S9PzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAZTUlEQVR4nO3de/xcdX3n8dcbghcCQS6RFiUGFaygha7p6mqp90vXG122VkEEW0sL1a1W67pdWRHYellv9YZlKxcvqLiioqhVQaxYV426QCM1ggJyCYSLIQk3jZ/945zoMP6SzI/ML/PN5PV8POaR35zznTOfnJnf7z3nO9/zPakqJEmatO0mXYAkSWAgSZIaYSBJkppgIEmSmmAgSZKaMG/SBQDssccetXjx4kmXIUmaY9/5zndurKqFM61rIpAWL17M0qVLJ12GJGmOJblyQ+vsspMkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1YaRASvLSJEuT3Jnk9E20fUWSFUlWJTk1yb3HUqkkaaqNeoR0LXAScOrGGiV5OvAa4MnAYuDBwOs3oz5J0jZipECqqrOr6lPATZtoeiTw/qpaVlW3ACcCR21eiZKkbcG4T4w9APj0wP2LgD2T7F5VdwuzJEcDRwMsWrRotK0fv8t4qgQ4ftUYt2Vds9uWdc1uW9Y1u21Z1+y21U5d4x7UsBMwWNH6n3ceblhVp1TVkqpasnDhjLNISJK2IeMOpDXAgoH7639ePebnkSRNmXEH0jLgwIH7BwLXD3fXSZI0bNRh3/OS3AfYHtg+yX2SzPT90weAP02yf5JdgdcCp4+tWknS1Br1COm1wO10Q7pf2P/82iSLkqxJsgigqr4AvBn4CnBlf3vd2KuWJE2dkUbZVdXxwPEbWL3TUNu3AW/brKokSdscpw6SJDXBQJIkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1YaRASrJbkk8mWZvkyiSHbaBdkpyU5Jokq5JckOSA8ZYsSZpGox4hvQe4C9gTOBw4eQNB80fAnwAHA7sB3wA+OIY6JUlTbpOBlGQ+cChwXFWtqaoLgXOAI2Zovg9wYVX9qKrWAR8C9h9nwZKk6TTKEdJ+wLqqWj6w7CJgpiOkjwIPTbJfkh2AI4EvzLTRJEcnWZpk6cqVK2dbtyRpyswboc1OwKqhZauAnWdoex3wNeAHwDrgJ8CTZtpoVZ0CnAKwZMmSGrFeSdKUGuUIaQ2wYGjZAmD1DG1fB/wusDdwH+D1wPlJdtycIiVJ02+UI6TlwLwk+1bVD/tlBwLLZmh7IPCxqrq6v396knfQfY+0dLOr1aw8cp9FY9vWJWPbkiTNbJNHSFW1FjgbOCHJ/CSPA57LzKPnvg38UZI9k2yX5AhgB+CycRYtSZo+oxwhARwLnArcANwEHFNVy5IsAr4P7F9VVwFvAu4P/D9gPl0QHVpVPx175ZKkqTJSIFXVzcAhMyy/im7Qw/r7dwB/2d+kGdmVKGkmTh0kSWqCgSRJaoKBJElqgoEkSWqCgSRJaoKBJElqgoEkSWqCgSRJaoKBJElqgoEkSWqCgSRJaoKBJElqwqizfTdh8R1njm1bV4xtS5KkcfAISZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUhK3qPCRpLj1yn0Vj29YlY9uStO3wCEmS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUhJECKcluST6ZZG2SK5MctpG2D07y2SSrk9yY5M3jK1eSNK1GnanhPcBdwJ7AQcC5SS6qqmWDjZLcC/hS3/6PgXXAfuMrV9r2OIOEthWbPEJKMh84FDiuqtZU1YXAOcARMzQ/Cri2qt5WVWur6o6qunisFUuSptIoXXb7AeuqavnAsouAA2Zo+xjgiiSf77vrLkjyyHEUKkmabqN02e0ErBpatgrYeYa2DwSeCDwHOA/4K+DTSX6rqu4abJjkaOBogEWLxtclMQmL7zhzbNu6YmxbkqStyyhHSGuABUPLFgCrZ2h7O3BhVX2+D6C3ALsDDx9uWFWnVNWSqlqycOHCWZYtSZo2owTScmBekn0Hlh0ILJuh7cVAjaMwSdK2ZZOBVFVrgbOBE5LMT/I44LnAB2do/iHgMUmekmR74OXAjcClY6xZkjSFRh32fSxwKnADcBNwTFUtS7II+D6wf1VdVVU/SPJC4H3A/YHvAs8Z/v5I0tbP4egat5ECqapuBg6ZYflVdIMeBpedTXdEJUlbnEG59fIS5lLjVl/6xkmXsFVxf229nMtOktQEA0mS1AS77CTdI3aNadw8QpIkNcFAkiQ1wS47bXF29UiaiYEk9QxKabLsspMkNcFAkiQ1wUCSJDXBQJIkNcFAkiQ1wUCSJDXBYd+StAUsvuPMsW3rirFtqS0eIUmSmmAgSZKaYCBJkppgIEmSmmAgSZKaYCBJkppgIEmSmmAgSZKaYCBJkppgIEmSmmAgSZKaYCBJkppgIEmSmmAgSZKaYCBJkpowUiAl2S3JJ5OsTXJlksNGeMz5SSqJ11ySJG3SqGHxHuAuYE/gIODcJBdV1bKZGic5fBbbliRNSEsXDtzkEVKS+cChwHFVtaaqLgTOAY7YQPtdgNcBr97M2iRJ25BRuuz2A9ZV1fKBZRcBB2yg/d8BJwMrNrbRJEcnWZpk6cqVK0cqVpI0vUYJpJ2AVUPLVgE7DzdMsgR4HPCuTW20qk6pqiVVtWThwoWj1CpJmmKjBNIaYMHQsgXA6sEFSbYD3gv8VVX9fDzlSZK2FaME0nJgXpJ9B5YdCAwPaFgALAE+lmQF8O1++dVJDt7sSiVJU22TI+Gqam2Ss4ETkryEbpTdc4HHDjVdBew1cH9v4FvAowC/JJIkbdSoJ8YeC9wXuAH4CHBMVS1LsijJmiSLqrNi/Y1fhdD1VXXXHNQuSZoiI50rVFU3A4fMsPwqukEPMz3mCiCbU5wkadvh1EGSpCYYSJKkJhhIkqQmON/cFFt96RsnXYIkjcwjJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITDCRJUhMMJElSEwwkSVITRgqkJLsl+WSStUmuTHLYBtodmeQ7SW5NcnWSNyeZN96SJUnTaNQjpPcAdwF7AocDJyc5YIZ2OwIvB/YAHg08GXjVGOqUJE25TR69JJkPHAo8oqrWABcmOQc4AnjNYNuqOnng7jVJPgw8cYz1SpKm1ChHSPsB66pq+cCyi4CZjpCG/T6wbKYVSY5OsjTJ0pUrV46wKUnSNBslkHYCVg0tWwXsvLEHJXkxsAR4y0zrq+qUqlpSVUsWLlw4Sq2SpCk2yoCDNcCCoWULgNUbekCSQ4A3Ak+pqhvveXmSpG3FKEdIy4F5SfYdWHYgG+6Kewbwv4FnV9Ulm1+iJGlbsMlAqqq1wNnACUnmJ3kc8Fzgg8NtkzwJ+DBwaFV9a9zFSpKm16jDvo8F7gvcAHwEOKaqliVZlGRNkkV9u+OAXYDP9cvXJPn8+MuWJE2bkU5araqbgUNmWH4V3aCH9fcd4i1JukecOkiS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUBANJktQEA0mS1AQDSZLUhJECKcluST6ZZG2SK5MctpG2r0iyIsmqJKcmuff4ypUkTatRj5DeA9wF7AkcDpyc5IDhRkmeDrwGeDKwGHgw8PqxVCpJmmqbDKQk84FDgeOqak1VXQicAxwxQ/MjgfdX1bKqugU4EThqjPVKkqbUKEdI+wHrqmr5wLKLgF87QuqXXTTUbs8ku9/zEiVJ24JU1cYbJAcDH6+q3xhY9mfA4VX1hKG2lwN/WVVf6O/vQNfVt09VXTHU9mjg6P7uw4AfbNb/5Ff2AG4c07bGybpmx7pmx7pmx7pmZ5x1PaiqFs60Yt4ID14DLBhatgBYPULb9T//WtuqOgU4ZYTnn5UkS6tqybi3u7msa3asa3asa3asa3a2VF2jdNktB+Yl2Xdg2YHAshnaLuvXDba7vqpuuuclSpK2BZsMpKpaC5wNnJBkfpLHAc8FPjhD8w8Af5pk/yS7Aq8FTh9jvZKkKTXqsO9jgfsCNwAfAY6pqmVJFiVZk2QRQP/d0ZuBrwBX9rfXjb/sjRp7N+CYWNfsWNfsWNfsWNfsbJG6NjmoQZKkLcGpgyRJTTCQJElNmJpAms18e1tSkpcmWZrkziSnT7oegCT3TvL+fj+tTvK9JH8w6boAknwoyXVJbk2yPMlLJl3ToCT7JrkjyYcmXQtAkgv6etb0t3Gdz7fZkjw/yaX97+Tl/TmNk6xnzdBtXZJ3TbKm9ZIsTvK5JLf0c4G+O8kop+XMdV0PT3J+PzfpZUn+cC6fb2oCiRHn25uAa4GTgFMnXciAecBPgMcDuwDHAWclWTzBmtZ7A7C4qhYAzwFOSvKoCdc06D3AtyddxJCXVtVO/e1hky4GIMlTgTcBLwZ2Bn4f+NEkaxrYRzvR/Z24Hfj4JGsa8F66QWO/CRxE97t57CQL6gPx08Bngd3oJjL4UJL95uo5pyKQZjnf3hZVVWdX1aeAZs7Fqqq1VXV8VV1RVb+oqs8CPwYm/oe/nwfxzvV3+9tDJljSLyV5PvBT4LxJ17IVeD1wQlX93/49dk1VXTPpogb8Z7oA+NqkC+ntA5xVVXdU1QrgC8w8PduW9FvAXsDbq2pdVZ0PfJ05/Ls6FYHE7Obb05Ake9Ltw5lOdt7ikrw3yW3AvwHXAZ+bcEkkWQCcALxy0rXM4A1Jbkzy9SRPmHQxSbYHlgAL+26eq/suqPtOurYBRwIfqHaGGf898PwkOyZ5APAHdKE0SdnAskfM1RNOSyDtBKwaWraKrqtAG9HPN/hh4Iyq+rdJ1wNQVcfSvXYH052UfefGH7FFnEg3k/1PJl3IkP9Kd5mXB9CdK/KZJJM+otwT2IHuKORgui6o36E7UX7i+vMmHw+cMelaBnyV7gP0rcDVwFLgUxOtqPtAeAPwN0l2SPI0uv2241w94bQE0mzm21MvyXZ0M27cBbx0wuXcTd9FcCHwQOCYSdaS5CDgKcDbJ1nHTKrqm1W1uqrurKoz6LpU/uOEy7q9//ddVXVdVd0IvI3J17Xei4ALq+rHky4Efvl7+E90H77m001kuivdd3ATU1U/Aw4BngmsoOsdOIsuMOfEtATSbObbE5AkwPvpPs0e2r/5WjSPyX+H9AS6C05elWQF8Crg0CTfnWRRG1DM3NWy5QroroV2dV9Li15EW0dHuwF7A+/uP1jcBJxGAwFeVRdX1eOraveqejrd0fi35ur5piKQZjnf3haVZF6S+wDbA9snuU8LwzmBk4GHA8+uqts31XhLSHL/fqjwTkm2769A/ALg/AmXdgpdKB7U394HnAs8fZJFJblfkqevf08lOZxuNNs/TbKu3mnAy/rXdFfg5XSjtSYqyWPpujdbGV1HfwT5Y+CY/nW8H913XBdt/JFzL8lv9++vHZO8im4U4Olz9XxTEUi9Gefbm2xJQNdvfjvdpd1f2P880b70JA8C/pzuj+uKgfMyDp9kXXSfqI+h+3R9C/AW4OVV9emJFlV1W1WtWH+j6yK+o6pWTrIuuu9pTgJW0l2r5mXAIVXVwrlIJ9INj18OXAp8D/ifE62ocyRwdlW11p3/n4Bn0L2WlwE/B14x0Yo6R9ANLLoBeDLw1IFRsGPnXHaSpCZM0xGSJGkrZiBJkppgIEmSmmAgSZKaYCBJkppgIEmSmmAgSROS5PAkX7yHjz2+lWsySeNiIEmzlOSKJLf3JxNfn+S0JDvNdjtV9eGqetpc1ChtjQwk6Z55dn+ht38H/C6znH2jkemjpKYYSNJm6C8693ngEUl26S8Nf12Sa5Kc1F8biCRH9dcrenuSm4Hj+2UXrt9Wkscm+XZ/uehv9/OurV+3T5Kvprvk/JfoZoSWpoqBJG2GJHvTzcr8PboZpH8OPJTu+j9PA14y0PzRdJfxvj9D87ol2Y1uwtZ3ArvTXa7h3CS7903OBL5DF0Qn0s3JJk0Vuw2ke+ZTSX5OdyHIc4F/pJtI9H797Olrk7wdOBr4h/4x11bVu/qff95dAeSXngn8sKrWz1D/kST/BXh2kvPpugWf0k9s+c9JPjOX/zlpEgwk6Z45pKq+vP5Okn9PN/v2dQNBsx0weIXZjV1tdi/gyqFlV9JdKmEv4Jb+MiuD6/a+Z6VLbTKQpPH4Cd2l1veoqp9voM3Gpta/FnjQ0LJFwBfopv/fNcn8gVBatIntSVsdv0OSxqCqrgO+CLw1yYIk2yV5SJLHj7iJzwH7JTmsv0jbHwP7A5+tqiuBpcDrk9wrye8Bz56T/4g0QQaSND4vAu4FfJ/uAoP/h+4Km5vUX7b6WcArgZuAVwPP6q8mCnAY3aCIm4HXAR8Ya+VSA7xA35RJ8jDgo3QjveYDr6uqEzfxmCuAlwx+JzKw7mDgH6vqYXNQ7tQaeh3+e1W9c2j9+4BrNvTaJClg36q6bM6LnRIbex9vbZIcDzy0ql446Vq2JL9DGpPFrzl3TpP9ijc+M5tuBXSfrC+oqt8Zx/NW1deArSKMHnnGI+f0NbjkyEtGfQ1gE69DVf3FeKqasON3mdtPtMevms0+11bOLrvp8yBg2aSL0IZfh/Uny0q6OwNpivTnqzwReHc/z9qZSU7q1+2R5LNJfprk5iRfSzL4+h+U5OJ+loCPJblP/7gnJLl64DmuSPKqmdr261/dz1RwbZKXJKkkD91Cu6AJG3gdTk7yuSRrgScmOX39a9M/5m8G9tufDG3vmUm+l+TWJD/pu3PWrzs3ycuG2l+c5JC5/V8269fex0l27d/7K5Pc0v/8wPUPSHJBP6vGv/Sv12eS7J7kw/0+/3aSxQPtK8mxSX7Yz5xxYj+A5Rt9+7OS3Gug/Z8luaz/vTsnyV4D6w5I8qV+3fVJ/nb4P5RkhyQfSfKJwe1OIwNpilTVk4CvAS/t51m7a2D1K4GrgYXAnsDfcvdhw88DngHsA/w2cNRGnmrGtkmeAfw18BS6705GHWE2VTbwOhxGNzvDzsCFg+37/fYq4KnAvnT7b9BaugET96M7gfaYgcA5A/jl9wxJDqQ7d+lz4/1fbTVmem9uB5xGd9S6CLgdePfQ454PHEG37x4CfKN/zG7ApXQDSQY9A3gU8Bi67tlTgMPpzg17BPACgCRPAt7Q1/WbdOePfbRftzPwZbqh/XvR/c6cN/gkSe4LfIrulILnVdXg7/TUMZC2HT+j+4V4UFX9rKq+Vncf0fLOqrq2qm4GPgMctJFtbajt84DTqmpZVd0GvH4O/h9bq09X1der6hdVdcfQuvX77V/784yOH1xZVRdU1SX9Yy8GPsKvwv7TwL5J9u3vHwF8bNr/cG3Er703q+qmqvpEVd1WVavpPhgMf1g6raour6pVdHMTXl5VX+7PKfs43VRQg95UVbdW1TLgX4EvVtWPBh6/vv3hwKlV9d1+lo3/BvyH/ojrWcCKqnprVd1RVaur6psDz7GALqwuB15cVevGsYNaZiBtO/4XcBnwxSQ/SvKaofUrBn6+DdjY5RQ21HYvRp+ZYFuzqVkaBtffbcaGJI9O8pW+y2kV8Bf0k6v2f+TOAl7Yd8G+APgg265fe28m2THJPyS5MsmtwD8D9xv6Lu/6gZ9vn+H+8O/DqO3vNgNHVa2hG9b/ALqjqcs38n95DN1R3huHPjxOLQNpG9F/+nplVT2Y7qTKv07y5DE/zXXAAwfuO7XNr2zsD8p13H1fLRpafyZwDrB3Ve0CvA8YHH12Bt0n8ScDt1XVNza/3KnySrqRoo+uqgXA7/fLt8QIvrvNwJFkPt3kudfQfQh5yEYe+0W67r7zkuw5l0W2wkDaRiR5VpKHJglwK7Cuv43TWcCLkzw8yY7A/xjz9qfVWcBRSfbv99vw9xU7AzdX1R3p5sw7bHBlH0C/AN7Ktn10tCE70x21/DTdrOrD+3cunUn3O3FQknsDfwd8s6quAD4L/EaSlye5d5Kdkzx68MFV9eZ+G+clmfpLjnge0pjM4jyhSdmX7ovchXSzCLy3qi4Y5xNU1eeTvBP4Ct0fyBPpvtO4c5zPsyGzPE+oGf1+ewdwPt1+ey3dEc96x9JNSfRu4Kt0AXa/oc18gG5/b9nRdVvHeULvoPujfiPdEctb2UL7qarOS3Ic8AlgV+Bf6AZQUFWrkzwV+Hu6kLyzr/WbQ9s4sQ+zLyd5Uv/92FRypgbNmSQPp/vC994bmXBUY5DkRcDRVfV7k65FuqfsstNYJfnDdBOA7gq8CfiMYTS3+m6+Y+mGHktbLQNJ4/bnwEq60UPrgGMmW850S/J0uv19PV23lLTVsstOktQEj5AkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU0wkCRJTTCQJElNMJAkSU34/1wiVnhwjQEiAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -1228,7 +1345,7 @@ "crit_func = rp.get_crit_func(params, options, df)\n", "crit_func(params)\n", "\n", - "constr = rp.get_parameter_constraints(\"robinson\")" + "constr = rp.get_parameter_constraints(\"robinson_crusoe\")" ] }, { @@ -1287,7 +1404,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/docs/index.rst b/docs/index.rst index e9019178d..0ed3579c8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -15,7 +15,7 @@ baseline model presented in: Evidence `_. *The Review of Economics and Statistics*, 76(4): 648-672. -.. image:: https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000 +.. image:: https://img.shields.io/badge/License-MIT-yellow.svg .. toctree:: :maxdepth: 1 diff --git a/docs/software/reliability.rst b/docs/software/reliability.rst index ab2c65bc4..f7ed0ef9d 100644 --- a/docs/software/reliability.rst +++ b/docs/software/reliability.rst @@ -53,8 +53,8 @@ implementation always remain. So, if you are struggling with a particularly poor performance in your application, please do not hesitate to let us know so we can help with the investigation. -For more details, see the script `online `_. The results for all the +For more details, see the script `online `_. The results for all the parameterizations analyzed in Keane and Wolpin (1994) are available `here -`_. +`_. diff --git a/environment.yml b/environment.yml index a7cf8eb41..224d54e94 100644 --- a/environment.yml +++ b/environment.yml @@ -6,7 +6,6 @@ dependencies: - python=3.7 - pip - anaconda-client - - bump2version - chaospy - click - codecov @@ -14,6 +13,7 @@ dependencies: - conda-verify - doc8 - estimagic>=0.0.14 + - joblib - jupyterlab - line_profiler - matplotlib @@ -37,4 +37,5 @@ dependencies: - tox-conda - pip: - apprise + - bump2version - pytest-randomly diff --git a/meta.yaml b/meta.yaml index 025abbd3f..c17597d41 100644 --- a/meta.yaml +++ b/meta.yaml @@ -20,6 +20,7 @@ requirements: - chaospy - click - estimagic>=0.0.12 + - joblib - mkl - numba>=0.42 - numpy diff --git a/respy/__init__.py b/respy/__init__.py index 2e8165634..d7cbacf22 100755 --- a/respy/__init__.py +++ b/respy/__init__.py @@ -21,7 +21,7 @@ from respy.method_of_simulated_moments import get_flat_moments # noqa: F401 from respy.method_of_simulated_moments import get_msm_func # noqa: F401 from respy.simulate import get_simulate_func # noqa: F401 -from respy.solve import solve # noqa: F401 +from respy.solve import get_solve_func # noqa: F401 from respy.tests.random_model import add_noise_to_params # noqa: F401 # We only maintain the code base for Python >= 3.6. diff --git a/respy/config.py b/respy/config.py index 0e0ccee00..b250e926d 100644 --- a/respy/config.py +++ b/respy/config.py @@ -34,19 +34,16 @@ # Some assert functions take rtol instead of decimals TOL_REGRESSION_TESTS = 1e-10 -# Interpolation +# Penalty for states which cannot be reached. INADMISSIBILITY_PENALTY = -400_000 SEED_STARTUP_ITERATION_GAP = 100 -IS_DEBUG = False - DEFAULT_OPTIONS = { "estimation_draws": 200, "estimation_seed": 1, "estimation_tau": 500, "interpolation_points": -1, - "n_periods": 40, "simulation_agents": 1000, "simulation_seed": 2, "solution_draws": 200, diff --git a/respy/interpolate.py b/respy/interpolate.py new file mode 100644 index 000000000..4b0cfd851 --- /dev/null +++ b/respy/interpolate.py @@ -0,0 +1,330 @@ +"""This module contains the code for approximate solutions to the DCDP.""" +import warnings + +import numba as nb +import numpy as np + +from respy.config import MAX_LOG_FLOAT +from respy.parallelization import parallelize_across_dense_dimensions +from respy.shared import calculate_expected_value_functions +from respy.shared import calculate_value_functions_and_flow_utilities + + +@parallelize_across_dense_dimensions +def interpolate( + wages, + nonpecs, + continuation_values, + is_inadmissible, + period_draws_emax_risk, + interpolation_points, + optim_paras, + options, +): + """Interface to switch between different interpolation routines.""" + period_expected_value_functions = _kw_94_interpolation( + wages, + nonpecs, + continuation_values, + is_inadmissible, + period_draws_emax_risk, + interpolation_points, + optim_paras, + options, + ) + + return period_expected_value_functions + + +def _kw_94_interpolation( + wages, + nonpecs, + continuation_values, + is_inadmissible, + period_draws_emax_risk, + interpolation_points, + optim_paras, + options, +): + r"""Calculate the approximate solution proposed by [1]_. + + The authors propose an interpolation method to alleviate the computation burden of + the full solution. The full solution calculates the expected value function with + Monte-Carlo simulation for each state in the state space for a pre-defined number of + points. Both, the number of states and points, have a huge impact on runtime. + + [1]_ propose an interpolation method to alleviate the computation burden. The + general idea is to calculate the expected value function with Monte-Carlo simulation + only for a much smaller number of states and predict the remaining expected value + functions with a linear model. The linear model is + + .. math:: + + EVF = MaxeVF + \sum^{n-1}_{i=0} MaxeVF - eVF_i + + \sum^{n-1}_{i=0} \sqrt{MaxeVF - eVF_i} + + where :math:`EVF` are the expected value functions generated by the Monte-Carlo + simulation, :math:`eVF_i` are the value functions generated with the expected value + of the shocks, and :math:`MaxeVF` is their maximum over all :math:`i`. + + The expected value of the shocks is zero for non-working alternatives. For working + alternatives, the shocks are log normally distributed and cannot be set to zero, but + :math:`E(X) = \exp\{\mu + \frac{\sigma^2}{2}\}` where :math:`\mu = 0`. + + After experimenting with various functions for :math:`g()`, the authors include + simple differences and the square root of the simple differences in the equation. + + The function consists of the following steps. + + 1. Create an indicator for whether the expected value function of the state is + calculated with Monte-Carlo simulation or interpolation. + + 2. Compute the expected value of the shocks. + + 3. Compute the right-hand side variables of the linear model. + + 4. Compute the left-hand side variables of the linear model by Monte-Carlo + simulation on subset of states. + + 5. Fit the linear model with ordinary least squares on the subset without + interpolation and predict the expected value functions for all other states. + + References + ---------- + .. [1] Keane, M. P. and Wolpin, K. I. (1994). `The Solution and Estimation of + Discrete Choice Dynamic Programming Models by Simulation and Interpolation: + Monte Carlo Evidence `_. *The Review of + Economics and Statistics*, 76(4): 648-672. + + """ + n_wages = len(optim_paras["choices_w_wage"]) + n_core_states_in_period = wages.shape[0] + + not_interpolated = _get_not_interpolated_indicator( + interpolation_points, + n_core_states_in_period, + next(options["solution_seed_iteration"]), + ) + + # Create an array with the expected value of the shocks. + expected_shocks = np.zeros(len(optim_paras["choices"])) + var = np.diag(optim_paras["shocks_cholesky"].dot(optim_paras["shocks_cholesky"].T)) + expected_shocks[:n_wages] = np.exp(np.clip(var[:n_wages], 0, MAX_LOG_FLOAT) / 2) + + exogenous, max_emax = _compute_rhs_variables( + wages, + nonpecs, + continuation_values, + expected_shocks, + optim_paras["delta"], + is_inadmissible, + ) + + endogenous = _compute_lhs_variable( + wages, + nonpecs, + continuation_values, + max_emax, + not_interpolated, + period_draws_emax_risk, + optim_paras["delta"], + is_inadmissible, + ) + + # Create prediction model based on the random subset of points where the EMAX is + # actually simulated and thus dependent and independent variables are available. For + # the interpolation points, the actual values are used. + period_expected_value_functions = _predict_with_linear_model( + endogenous, exogenous, max_emax, not_interpolated + ) + + return period_expected_value_functions + + +def _get_not_interpolated_indicator(interpolation_points, n_states, seed): + """Get indicator for states which will be not interpolated. + + Parameters + ---------- + interpolation_points : int + Number of states which will be interpolated. + n_states : int + Total number of states in period. + seed : int + Seed to set randomness. + + Returns + ------- + not_interpolated : numpy.ndarray + Array of shape (n_states,) indicating states which will not be interpolated. + + """ + np.random.seed(seed) + + indices = np.random.choice(n_states, size=interpolation_points, replace=False) + not_interpolated = np.zeros(n_states, dtype="bool") + not_interpolated[indices] = True + + return not_interpolated + + +def _compute_rhs_variables(wages, nonpec, emaxs, draws, delta, is_inadmissible): + """Compute right-hand side variables of the linear model. + + Constructing the exogenous variable for all states, including the ones where + simulation will take place. All information will be used in either the construction + of the prediction model or the prediction step. + + Parameters + ---------- + wages : numpy.ndarray + Array with shape (n_states_in_period, n_choices). + nonpec : numpy.ndarray + Array with shape (n_states_in_period, n_choices). + emaxs : numpy.ndarray + Array with shape (n_states_in_period, n_choices). + draws : numpy.ndarray + Array with shape (n_choices,). + delta : float + Discount factor. + is_inadmissible : numpy.ndarray + Array with shape (n_states_in_period, n_choices) containing an indicator for + whether the state in the next period is not admissible. + + Returns + ------- + exogenous : numpy.ndarray + Array with shape (n_states_in_period, n_choices * 2 + 1) where the last column + contains the constant. + max_value_functions : numpy.ndarray + Array with shape (n_states_in_period,) containing maximum over all value + functions computed with the expected value of shocks. + + """ + value_functions, _ = calculate_value_functions_and_flow_utilities( + wages, nonpec, emaxs, draws, delta, is_inadmissible + ) + + max_value_functions = value_functions.max(axis=1) + exogenous = max_value_functions.reshape(-1, 1) - value_functions + + exogenous = np.column_stack( + (exogenous, np.sqrt(exogenous), np.ones(exogenous.shape[0])) + ) + + return exogenous, max_value_functions + + +def _compute_lhs_variable( + wages, + nonpec, + continuation_values, + max_value_functions, + not_interpolated, + draws, + delta, + is_inadmissible, +): + """Calculate left-hand side variable for all states which are not interpolated. + + The function computes the full solution for a subset of states. Then, the dependent + variable is the expected value function minus the maximum of value function with the + expected shocks. + + Parameters + ---------- + wages : numpy.ndarray + Array with shape (n_states_in_period, n_choices). + nonpec : numpy.ndarray + Array with shape (n_states_in_period, n_choices). + continuation_values : numpy.ndarray + Array with shape (n_states_in_period, n_choices). + max_value_functions : numpy.ndarray + Array with shape (n_states_in_period,) containing maximum over all value + functions computed with the expected value of shocks. + not_interpolated : numpy.ndarray + Array with shape (n_states_in_period,) containing indicators for simulated + continuation_values. + draws : numpy.ndarray + Array with shape (n_draws, n_choices) containing draws. + delta : float + Discount factor. + is_inadmissible : numpy.ndarray + Array with shape (n_states_in_period, n_choices) containing an indicator for + whether the state in the next period is not admissible. + + """ + expected_value_functions = calculate_expected_value_functions( + wages[not_interpolated], + nonpec[not_interpolated], + continuation_values[not_interpolated], + draws, + delta, + is_inadmissible[not_interpolated], + ) + endogenous = expected_value_functions - max_value_functions[not_interpolated] + + return endogenous + + +def _predict_with_linear_model( + endogenous, exogenous, max_value_functions, not_interpolated +): + """Predict the expected value function for interpolated states with a linear model. + + The linear model is fitted with ordinary least squares. Then, predict the expected + value function for all interpolated states and use the compute expected value + functions for the remaining states. + + Parameters + ---------- + endogenous : numpy.ndarray + Array with shape (num_simulated_states_in_period,) containing the expected value + functions minus the maximufor states used to interpolate the rest. + exogenous : numpy.ndarray + Array with shape (n_states_in_period, n_choices * 2 + 1) containing exogenous + variables. + max_value_functions : numpy.ndarray + Array with shape (n_states_in_period,) containing maximum over all value + functions computed with the expected value of shocks. + not_interpolated : numpy.ndarray + Array with shape (n_states_in_period,) containing indicator for states which + are not interpolated and used to estimate the coefficients for the + interpolation. + + """ + beta = ols(endogenous, exogenous[not_interpolated]) + + endogenous_predicted = exogenous.dot(beta) + endogenous_predicted = np.clip(endogenous_predicted, 0, None) + + predictions = endogenous_predicted + max_value_functions + predictions[not_interpolated] = endogenous + max_value_functions[not_interpolated] + + if not np.all(np.isfinite(beta)): + warnings.warn("OLS coefficients in the interpolation are not finite.") + + return predictions + + +@nb.njit +def ols(y, x): + """Calculate the coefficients of a linear model with OLS using a pseudo-inverse. + + Parameters + ---------- + x : numpy.ndarray + Array with shape (n_observations, n_independent_variables) containing the + independent variables. + y : numpy.ndarray + Array with shape (n_observations,) containing the dependent variable. + + Returns + ------- + beta : numpy.ndarray + Array with shape (n_independent_variables,) containing the coefficients of the + linear model. + + """ + beta = np.dot(np.linalg.pinv(x.T.dot(x)), x.T.dot(y)) + return beta diff --git a/respy/likelihood.py b/respy/likelihood.py index 599227ad6..397a3ab15 100644 --- a/respy/likelihood.py +++ b/respy/likelihood.py @@ -8,20 +8,25 @@ from scipy import special from respy.conditional_draws import create_draws_and_log_prob_wages +from respy.config import INDEXER_INVALID_INDEX from respy.config import MAX_FLOAT from respy.config import MIN_FLOAT +from respy.parallelization import distribute_and_combine_likelihood +from respy.parallelization import parallelize_across_dense_dimensions from respy.pre_processing.data_checking import check_estimation_data from respy.pre_processing.model_processing import process_params_and_options +from respy.pre_processing.process_covariates import identify_necessary_covariates from respy.shared import aggregate_keane_wolpin_utility +from respy.shared import cast_bool_to_numeric +from respy.shared import compute_covariates from respy.shared import convert_labeled_variables_to_codes -from respy.shared import create_base_covariates from respy.shared import create_base_draws from respy.shared import create_core_state_space_columns +from respy.shared import create_dense_state_space_columns from respy.shared import downcast_to_smallest_dtype from respy.shared import generate_column_dtype_dict_for_estimation from respy.shared import rename_labels_to_internal -from respy.solve import solve_with_backward_induction -from respy.state_space import StateSpace +from respy.solve import get_solve_func def get_crit_func( @@ -66,7 +71,8 @@ def get_crit_func( check_estimation_data(df, optim_paras) - state_space = StateSpace(optim_paras, options) + solve = get_solve_func(params, options) + state_space = solve.keywords["state_space"] df, type_covariates = _process_estimation_data( df, state_space, optim_paras, options @@ -86,7 +92,7 @@ def get_crit_func( log_like, df=df, base_draws_est=base_draws_est, - state_space=state_space, + solve=solve, type_covariates=type_covariates, options=options, return_scalar=return_scalar, @@ -100,7 +106,7 @@ def log_like( params, df, base_draws_est, - state_space, + solve, type_covariates, options, return_scalar, @@ -119,25 +125,27 @@ def log_like( different types. base_draws_est : numpy.ndarray Set of draws to calculate the probability of observed wages. - state_space : :class:`~respy.state_space.StateSpace` - State space. + solve : :func:`~respy.solve.solve` + Function which solves the model with new parameters. options : dict Contains model options. """ optim_paras, options = process_params_and_options(params, options) - state_space.update_systematic_rewards(optim_paras) - - state_space = solve_with_backward_induction(state_space, optim_paras, options) + state_space = solve(params) - contribs, df = _internal_log_like_obs( + contribs, df, log_type_probabilities = _internal_log_like_obs( state_space, df, base_draws_est, type_covariates, optim_paras, options ) + # Return mean log likelihood or log likelihood contributions. out = contribs.mean() if return_scalar else contribs + if return_comparison_plot_data: - comparison_plot_data = _create_comparison_plot_data(df, optim_paras) + comparison_plot_data = _create_comparison_plot_data( + df, log_type_probabilities, optim_paras + ) out = (out, comparison_plot_data) return out @@ -184,15 +192,74 @@ def _internal_log_like_obs( """ df = df.copy() + n_types = optim_paras["n_types"] + + wages = state_space.get_attribute("wages") + nonpecs = state_space.get_attribute("nonpecs") + expected_value_functions = state_space.get_attribute("expected_value_functions") + is_inadmissible = state_space.get_attribute("is_inadmissible") + + df = _compute_wage_and_choice_likelihood_contributions( + df, + base_draws_est, + wages, + nonpecs, + expected_value_functions, + is_inadmissible, + optim_paras=optim_paras, + options=options, + ) + + # Aggregate choice probabilities and wage densities to log likes per observation. + loglikes = ( + df.groupby(["identifier", "period", "type"])[["loglike_choice", "loglike_wage"]] + .first() + .unstack("type") + if optim_paras["n_types"] >= 2 + else df[["loglike_choice", "loglike_wage"]] + ) + per_observation_loglikes = loglikes["loglike_choice"] + loglikes["loglike_wage"] + per_individual_loglikes = per_observation_loglikes.groupby("identifier").sum() + + if n_types >= 2: + # To not alter the attribute in the functools.partial, create a copy. + type_covariates = type_covariates.copy() + # Weight each type-specific individual log likelihood with the type probability. + log_type_probabilities = _compute_log_type_probabilities( + type_covariates, optim_paras, options + ) + weighted_loglikes = per_individual_loglikes + log_type_probabilities + + contribs = special.logsumexp(weighted_loglikes, axis=1) + else: + contribs = per_individual_loglikes.to_numpy().flatten() + log_type_probabilities = None + + contribs = np.clip(contribs, MIN_FLOAT, MAX_FLOAT) + + return contribs, df, log_type_probabilities + + +@distribute_and_combine_likelihood +@parallelize_across_dense_dimensions +def _compute_wage_and_choice_likelihood_contributions( + df, + base_draws_est, + wages, + nonpecs, + expected_value_functions, + is_inadmissible, + optim_paras, + options, +): n_choices = len(optim_paras["choices"]) n_obs = df.shape[0] - n_types = optim_paras["n_types"] - indices = df[[f"index_type_{i}" for i in range(optim_paras["n_types"])]].to_numpy() + indices = df["index"].to_numpy() - wages_systematic = state_space.wages[indices].reshape(n_obs * n_types, n_choices) - log_wages_observed = df["log_wage"].to_numpy().repeat(n_types) - choices = df["choice"].to_numpy().repeat(n_types) + wages_systematic = wages[indices] + log_wages_observed = df["log_wage"].to_numpy() + choices = df["choice"].to_numpy() draws, wage_loglikes = create_draws_and_log_prob_wages( log_wages_observed, @@ -205,61 +272,64 @@ def _internal_log_like_obs( optim_paras["is_meas_error"], ) - draws = draws.reshape(n_obs, n_types, -1, n_choices) + draws = draws.reshape(n_obs, -1, n_choices) - continuation_values = state_space.get_continuation_values(indices=indices) + # To get the continuation values, correctly index the expected value functions. This + # is the same operation done in `_SingleDimStateSpace.get_continuation_values()`. + child_indices = df[[f"child_index_{c}" for c in optim_paras["choices"]]] + mask = child_indices != INDEXER_INVALID_INDEX + valid_indices = np.where(mask, child_indices, 0) + continuation_values = np.where(mask, expected_value_functions[valid_indices], 0) choice_loglikes = _simulate_log_probability_of_individuals_observed_choice( - state_space.wages[indices], - state_space.nonpec[indices], + wages[indices], + nonpecs[indices], continuation_values, draws, optim_paras["delta"], - state_space.is_inadmissible[indices], - choices.reshape(-1, n_types), + is_inadmissible[indices], + choices, options["estimation_tau"], ) - wage_loglikes = wage_loglikes.reshape(n_obs, n_types) - - choice_loglikes = np.clip(choice_loglikes, MIN_FLOAT, MAX_FLOAT) - wage_loglikes = np.clip(wage_loglikes, MIN_FLOAT, MAX_FLOAT) + df["loglike_choice"] = np.clip(choice_loglikes, MIN_FLOAT, MAX_FLOAT) + df["loglike_wage"] = np.clip(wage_loglikes, MIN_FLOAT, MAX_FLOAT) - choice_cols = [f"loglike_choice_type_{i}" for i in range(n_types)] - wage_cols = [f"loglike_wage_type_{i}" for i in range(n_types)] + return df - df = df.reindex(columns=df.columns.tolist() + choice_cols + wage_cols) - df[choice_cols] = choice_loglikes - df[wage_cols] = wage_loglikes - data = df[choice_cols].to_numpy() + df[wage_cols].to_numpy() - per_individual_loglikes = ( - pd.DataFrame(data=data, index=df.index).groupby("identifier").sum() - ) +def _compute_log_type_probabilities(df, optim_paras, options): + dense_columns = create_dense_state_space_columns(optim_paras) + dense_columns.remove("type") - if n_types >= 2: - z = () + if dense_columns: + x_betas = df.groupby(dense_columns, as_index=False).apply( + _compute_x_beta_for_type_probability, optim_paras, options + ) + else: + x_betas = _compute_x_beta_for_type_probability(df, optim_paras, options) - for level in optim_paras["type_prob"]: - labels = optim_paras["type_prob"][level].index - x_beta = np.dot(type_covariates[labels], optim_paras["type_prob"][level]) + probabilities = special.softmax(x_betas, axis=1) - z += (x_beta,) + probabilities = np.clip(probabilities, 1 / MAX_FLOAT, None) + log_probabilities = np.log(probabilities) - type_probabilities = special.softmax(np.column_stack(z), axis=1) + return log_probabilities - type_probabilities = np.clip(type_probabilities, 1 / MAX_FLOAT, None) - log_type_probabilities = np.log(type_probabilities) - weighted_loglikes = per_individual_loglikes + log_type_probabilities +def _compute_x_beta_for_type_probability(df, optim_paras, options): + for type_ in range(optim_paras["n_types"]): + first_observations = df.copy().assign(type=type_) + relevant_covariates = identify_necessary_covariates( + optim_paras["type_prob"][type_].index, options["covariates_all"] + ) + first_observations = compute_covariates(first_observations, relevant_covariates) + first_observations = cast_bool_to_numeric(first_observations) - contribs = special.logsumexp(weighted_loglikes, axis=1) - else: - contribs = per_individual_loglikes.to_numpy().flatten() + labels = optim_paras["type_prob"][type_].index + df[type_] = np.dot(first_observations[labels], optim_paras["type_prob"][type_]) - contribs = np.clip(contribs, MIN_FLOAT, MAX_FLOAT) - - return contribs, df + return df[range(optim_paras["n_types"])] @nb.njit @@ -402,8 +472,10 @@ def _process_estimation_data(df, state_space, optim_paras, options): The DataFrame which contains the data used for estimation. The DataFrame contains individual identifiers, periods, experiences, lagged choices, choices in current period, the wage and other observed data. - state_space : ~respy.state_space.StateSpace + indexer : numpy.ndarray + Indexer for the core state space. optim_paras : dict + options : dict Returns ------- @@ -433,25 +505,18 @@ def _process_estimation_data(df, state_space, optim_paras, options): df = convert_labeled_variables_to_codes(df, optim_paras) # Get indices of states in the state space corresponding to all observations for all - # types. The indexer has the shape (n_observations, n_types). + # types. The indexer has the shape (n_observations,). n_periods = int(df.index.get_level_values("period").max() + 1) - indices = () + indices = [] + core_columns = create_core_state_space_columns(optim_paras) for period in range(n_periods): period_df = df.query("period == @period") - - core_columns = create_core_state_space_columns(optim_paras) period_core = tuple(period_df[col].to_numpy() for col in core_columns) + period_indices = state_space.indexer[period][period_core] + indices.append(period_indices) - period_observables = tuple( - period_df[observable].to_numpy() - for observable in optim_paras["observables"] - ) - - period_indices = state_space.indexer[period][period_core + period_observables] - indices += (period_indices,) - - indices = np.concatenate(indices).reshape(-1, optim_paras["n_types"]) + indices = np.concatenate(indices) # The indexer is now sorted in period-individual pairs whereas the estimation needs # individual-period pairs. Sort it! @@ -461,28 +526,27 @@ def _process_estimation_data(df, state_space, optim_paras, options): .sort_values(["identifier", "period"])["__index__"] .to_numpy() ) - indices = indices[indices_to_reorder] + df["index"] = indices[indices_to_reorder] - # Finally, add the indices to the DataFrame. - type_index_cols = [f"index_type_{i}" for i in range(optim_paras["n_types"])] - df = df.reindex(columns=df.columns.tolist() + type_index_cols) - df[type_index_cols] = indices + # Add indices of child states to the DataFrame. + children = pd.DataFrame( + data=state_space.indices_of_child_states[df["index"].to_numpy()], + index=df.index, + columns=[f"child_index_{c}" for c in optim_paras["choices"]], + ) + df = pd.concat([df, children], axis="columns") + # For the estimation, log wages are needed with shape (n_observations, n_types). df["log_wage"] = np.log(np.clip(df.wage.to_numpy(), 1 / MAX_FLOAT, MAX_FLOAT)) df = df.drop(columns="wage") # For the type covariates, we only need the first observation of each individual. if optim_paras["n_types"] >= 2: - initial_states = df.query("period == 0") - covariates = create_base_covariates( - initial_states, options["covariates"], raise_errors=False - ) - - all_data = pd.concat([covariates, initial_states], axis="columns") - - type_covariates = all_data[optim_paras["type_covariates"]].apply( - downcast_to_smallest_dtype + initial_states = df.query("period == 0").copy() + type_covariates = compute_covariates( + initial_states, options["covariates_core"], raise_errors=False ) + type_covariates = type_covariates.apply(downcast_to_smallest_dtype) else: type_covariates = None @@ -521,7 +585,7 @@ def _adjust_optim_paras_for_estimation(optim_paras, df): return optim_paras -def _create_comparison_plot_data(df, optim_paras): +def _create_comparison_plot_data(df, log_type_probabilities, optim_paras): """Create DataFrame for estimagic's comparison plot.""" # During the likelihood calculation, the log likelihood for missing wages is # substituted with 0. Remove these log likelihoods to get the correct picture. @@ -540,4 +604,18 @@ def _create_comparison_plot_data(df, optim_paras): df["type"] = splitted_label[3] df = df.drop(columns="variable") + if log_type_probabilities is not None: + log_type_probabilities = log_type_probabilities.reset_index().melt( + id_vars=["identifier", "period"] + ) + log_type_probabilities["kind"] = "log_type_probability" + log_type_probabilities["type"] = ( + log_type_probabilities["variable"] + .str.split("_", expand=True)[3] + .astype(int) + ) + log_type_probabilities = log_type_probabilities.drop(columns="variable") + + df = df.append(log_type_probabilities, sort=False) + return df diff --git a/respy/parallelization.py b/respy/parallelization.py new file mode 100644 index 000000000..31dbeefb3 --- /dev/null +++ b/respy/parallelization.py @@ -0,0 +1,219 @@ +"""This module contains the code to control parallel execution.""" +import functools + +import joblib +import numpy as np +import pandas as pd + +from respy.shared import create_dense_state_space_columns + + +def parallelize_across_dense_dimensions(func=None, *, n_jobs=1): + """Parallelizes decorated function across dense state space dimensions. + + Parallelization is only possible if the decorated function has no side-effects to + other dense dimensions. This might be true for different levels. E.g. + :meth:`respy.solve._create_choice_rewards` can be directly + parallelized. :func:`respy.solve.solve_with_backward_induction` cannot be + directly parallelized because the continuation values for one dense dimension will + become important for others if we implement exogenous processes. Thus, parallelize + across periods. + + If applied to a function, the decorator recognizes if the model or state space + contains dense dimensions likes types or observables. Then, it splits the operation + across dense dimensions by patching the attribute access such that each sub state + space can only access its attributes. + + The decorator can be applied to functions without trailing parentheses. At the same + time, the `*` prohibits to use the decorator with positional arguments. + + """ + + def decorator_parallelize_across_dense_dimensions(func): + @functools.wraps(func) + def wrapper_parallelize_across_dense_dimensions(*args, **kwargs): + dense_indices = _infer_dense_indices_from_arguments(args, kwargs) + if dense_indices: + args_, kwargs_ = _broadcast_arguments(args, kwargs, dense_indices) + + out = joblib.Parallel(n_jobs=n_jobs)( + joblib.delayed(func)(*args_[idx], **kwargs_[idx]) + for idx in dense_indices + ) + + # Re-order multiple return values from list of tuples to tuple of lists + # to tuple of dictionaries to set as state space attributes. + if isinstance(out[0], tuple): + n_returns = len(out[0]) + tuple_of_lists = tuple( + [single_out[i] for single_out in out] for i in range(n_returns) + ) + out = tuple( + dict(zip(dense_indices, list_)) for list_ in tuple_of_lists + ) + else: + out = dict(zip(dense_indices, out)) + else: + out = func(*args, **kwargs) + + return out + + return wrapper_parallelize_across_dense_dimensions + + # Ensures that the decorator can be used without parentheses. + if callable(func): + return decorator_parallelize_across_dense_dimensions(func) + else: + return decorator_parallelize_across_dense_dimensions + + +def _infer_dense_indices_from_arguments(args, kwargs): + """Infer the dense indices from the arguments. + + This function uses the intersection of all dense indices from the arguments. Since + the simulated data or data for the likelihood might not comprise all dense + dimensions, we might need to discard some indices. + + """ + list_of_dense_indices = [] + for arg in args: + if _is_dictionary_with_tuple_keys(arg): + list_of_dense_indices.append(set(arg.keys())) + for kwarg in kwargs.values(): + if _is_dictionary_with_tuple_keys(kwarg): + list_of_dense_indices.append(set(kwarg.keys())) + + intersection_of_dense_indices = ( + set.intersection(*list_of_dense_indices) if list_of_dense_indices else [] + ) + + return intersection_of_dense_indices + + +def _is_dictionary_with_tuple_keys(candidate): + """Infer whether the argument is a dictionary with tuple keys.""" + return isinstance(candidate, dict) and all( + isinstance(key, tuple) for key in candidate + ) + + +def _broadcast_arguments(args, kwargs, dense_indices): + """Broadcast arguments to dense state space dimensions.""" + args = list(args) if isinstance(args, tuple) else [args] + + # Broadcast arguments which are not captured in a dictionary with dense state space + # dimension as keys. + for i, arg in enumerate(args): + if _is_dense_dictionary_argument(arg, dense_indices): + args[i] = {idx: arg[idx] for idx in dense_indices} + else: + args[i] = {idx: arg for idx in dense_indices} + for kwarg, value in kwargs.items(): + if _is_dense_dictionary_argument(value, dense_indices): + kwargs[kwarg] = {idx: kwargs[kwarg][idx] for idx in dense_indices} + else: + kwargs[kwarg] = {idx: value for idx in dense_indices} + + # Re-order arguments for zipping. + args = {idx: [arg[idx] for arg in args] for idx in dense_indices} + kwargs = { + idx: {kwarg: value[idx] for kwarg, value in kwargs.items()} + for idx in dense_indices + } + + return args, kwargs + + +def _is_dense_dictionary_argument(argument, dense_indices): + """Check whether all keys of the dictionary argument are also dense indices. + + We cannot check whether all dense indices are in the argument because `splitted_df` + in :func:`distribute_and_combine_simulation` may not cover all dense combinations. + + """ + return isinstance(argument, dict) and all(idx in argument for idx in dense_indices) + + +def distribute_and_combine_simulation(func): + """Distribute the simulation across sub state spaces and combine.""" + + @functools.wraps(func) + def wrapper_distribute_and_combine_simulation(df, *args, optim_paras, **kwargs): + dense_columns = create_dense_state_space_columns(optim_paras) + splitted_df = _split_dataframe(df, dense_columns) if dense_columns else df + + out = func(splitted_df, *args, optim_paras, **kwargs) + + df = pd.concat(out.values()).sort_index() if isinstance(out, dict) else out + + return df + + return wrapper_distribute_and_combine_simulation + + +def distribute_and_combine_likelihood(func): + """Distribute the likelihood calculation across sub state spaces and combine.""" + + @functools.wraps(func) + def wrapper_distribute_and_combine_likelihood( + df, base_draws_est, *args, optim_paras, options + ): + dense_columns = create_dense_state_space_columns(optim_paras) + # Duplicate the DataFrame for each type. + if dense_columns: + n_obs = df.shape[0] + n_types = optim_paras["n_types"] + # Number each state to split the shocks later. This is necessary to keep the + # regression tests from failing. + df["__id"] = np.arange(n_obs) + # Each row of indices corresponds to a state whereas the columns refer to + # different types. + indices = np.arange(n_obs * n_types).reshape(n_obs, n_types) + splitted_df = {} + for i in range(optim_paras["n_types"]): + df_ = df.copy().assign(type=i) + type_specific_dense = _split_dataframe(df_, dense_columns) + splitted_df = {**splitted_df, **type_specific_dense} + splitted_shocks = _split_shocks( + base_draws_est, splitted_df, indices, optim_paras + ) + else: + splitted_df = df + splitted_shocks = base_draws_est + + out = func(splitted_df, splitted_shocks, *args, optim_paras, options) + + out = pd.concat(out.values()).sort_index() if isinstance(out, dict) else out + + return out + + return wrapper_distribute_and_combine_likelihood + + +def _split_dataframe(df, dense_columns): + """Split a DataFrame by creating groups of the same values for the dense dims.""" + groups = {} + for name, group in df.groupby(dense_columns): + name = (int(name),) if len(dense_columns) == 1 else tuple(int(i) for i in name) + groups[name] = group + + return groups + + +def _split_shocks(base_draws_est, splitted_df, indices, optim_paras): + """Split the shocks. + + Previously, shocks were assigned to observations which were ordered like observation + * n_types. Due to the changes to the dense dimensions, this might not be true + anymore. Thus, ensure the former ordering with the `__id` variable. This will be + removed with new regression tests. + + """ + splitted_shocks = {} + for dense_idx, sub_df in splitted_df.items(): + type_ = dense_idx[-1] if optim_paras["n_types"] >= 2 else 0 + sub_indices = sub_df.pop("__id").to_numpy() + shock_indices_for_group = indices[sub_indices][:, type_].reshape(-1) + splitted_shocks[dense_idx] = base_draws_est[shock_indices_for_group] + + return splitted_shocks diff --git a/respy/pre_processing/data_checking.py b/respy/pre_processing/data_checking.py index 3502f3bf5..02d573109 100644 --- a/respy/pre_processing/data_checking.py +++ b/respy/pre_processing/data_checking.py @@ -66,7 +66,14 @@ def check_estimation_data(df, optim_paras): assert df.drop(columns="Wage").notna().all().all() # We check individual state variables against the recorded choices. - df.groupby("Identifier").apply(_check_state_variables, optim_paras=optim_paras) + for choice in optim_paras["choices_w_exp"]: + df["__exp"] = df[f"Experience_{choice.title()}"] + df["Choice"].eq(choice) + shifted_exp = ( + df.groupby("Identifier")["__exp"].transform("shift").dropna().astype(int) + ) + assert shifted_exp.eq( + df.loc[shifted_exp.index, f"Experience_{choice.title()}"] + ).all() # Check that there are no duplicated observations for any period by agent. assert ~df.duplicated(subset=["Identifier", "Period"]).any() @@ -77,24 +84,6 @@ def check_estimation_data(df, optim_paras): assert (max_periods_per_ind == n_obs_per_ind).all() -def _check_state_variables(agent, optim_paras): - """Check that state variables in the dataset. - - Construct the experience and schooling levels implied by the reported - choices and compare them to the information provided in the dataset. - - """ - experiences = agent.iloc[0].filter(like="Experience_").to_numpy() - - for _, row in agent.iterrows(): - - assert (experiences == row.filter(like="Experience_").to_numpy()).all() - - if row.Choice in optim_paras["choices_w_exp"]: - index_of_choice = optim_paras["choices_w_exp"].index(row.Choice) - experiences[index_of_choice] += 1 - - def check_simulated_data(optim_paras, df): """Check simulated data. diff --git a/respy/pre_processing/model_checking.py b/respy/pre_processing/model_checking.py index 2079136da..a32f21fe1 100644 --- a/respy/pre_processing/model_checking.py +++ b/respy/pre_processing/model_checking.py @@ -1,34 +1,33 @@ """Everything related to validate the model.""" import numpy as np -import pandas as pd def validate_options(o): """Validate the options provided by the user.""" + assert _is_positive_nonzero_integer(o["n_periods"]) + for option, value in o.items(): if "draws" in option: assert _is_positive_nonzero_integer(value) elif option.endswith("_seed"): assert _is_nonnegative_integer(value) - else: - pass assert 0 < o["estimation_tau"] - assert ( _is_positive_nonzero_integer(o["interpolation_points"]) or o["interpolation_points"] == -1 ) - - # Number of periods. - assert _is_positive_nonzero_integer(o["n_periods"]) - - # Covariates. - if "covariates" in o: - assert all( - isinstance(key, str) and isinstance(val, str) - for key, val in o["covariates"].items() - ) + assert _is_positive_nonzero_integer(o["simulation_agents"]) + assert isinstance(o["core_state_space_filters"], list) and all( + isinstance(filter_, str) for filter_ in o["core_state_space_filters"] + ) + assert isinstance(o["inadmissible_states"], dict) and all( + isinstance(key, str) + and isinstance(val, list) + and all(isinstance(condition, str) for condition in val) + for key, val in o["inadmissible_states"].items() + ) + assert o["monte_carlo_sequence"] in ["random", "halton", "sobol"] def validate_params(params, optim_paras): @@ -87,48 +86,65 @@ def check_model_solution(optim_paras, options, state_space): [max(choices[choice]["start"]) for choice in optim_paras["choices_w_exp"]] ) n_periods = options["n_periods"] - n_choices_w_exp = len(optim_paras["choices_w_exp"]) # Check period. - assert np.all(np.isin(state_space.states[:, 0], range(n_periods))) + assert np.all(np.isin(state_space.core.period, range(n_periods))) # The sum of years of experiences cannot be larger than constraint time. assert np.all( - state_space.states[:, 1 : n_choices_w_exp + 1].sum(axis=1) - <= (state_space.states[:, 0] + max_initial_experience.sum()) + state_space.core[[f"exp_{c}" for c in optim_paras["choices_w_exp"]]].sum(axis=1) + <= (state_space.core.period + max_initial_experience.sum()) ) # Choice experience cannot exceed the time frame. for choice in optim_paras["choices_w_exp"]: - idx = list(choices).index(choice) + 1 - assert np.all(state_space.states[:, idx] <= choices[choice]["max"]) + assert state_space.core[f"exp_{choice}"].le(choices[choice]["max"]).all() # Lagged choices are always in ``range(n_choices)``. if optim_paras["n_lagged_choices"]: - assert np.isin( - state_space.states[ - :, - n_choices_w_exp - + 1 : n_choices_w_exp - + optim_paras["n_lagged_choices"] - + 1, - ], - range(len(choices)), - ).all() - - # States and covariates have finite and nonnegative values. - assert np.all(state_space.states >= 0) - assert np.all(np.isfinite(state_space.states)) + assert np.all( + state_space.core.filter(regex=r"\blagged_choice_[0-9]*\b").isin( + range(len(choices)) + ) + ) + + assert np.all(np.isfinite(state_space.core)) # Check for duplicate rows in each period. We only have possible duplicates if there # are multiple initial conditions. - assert not pd.DataFrame(state_space.states).duplicated().any() + assert not state_space.core.duplicated().any() # Check that we have as many indices as states. n_valid_indices = sum((indexer >= 0).sum() for indexer in state_space.indexer) - assert state_space.states.shape[0] == n_valid_indices + assert state_space.core.shape[0] == n_valid_indices # Check finiteness of rewards and emaxs. - assert np.all(np.isfinite(state_space.wages)) - assert np.all(np.isfinite(state_space.nonpec)) - assert np.all(np.isfinite(state_space.emax_value_functions)) + assert np.all( + _apply_to_attribute_of_state_space( + state_space.get_attribute("wages"), np.isfinite + ) + ) + assert np.all( + _apply_to_attribute_of_state_space( + state_space.get_attribute("nonpecs"), np.isfinite + ) + ) + assert np.all( + _apply_to_attribute_of_state_space( + state_space.get_attribute("expected_value_functions"), np.isfinite + ) + ) + + +def _apply_to_attribute_of_state_space(attribute, func): + """Apply a function to a state space attribute which might be dense or not. + + Attribute might be `state_space.wages` which can be a dictionary or a Numpy array. + + """ + if isinstance(attribute, dict): + out = [func(val) for val in attribute.values()] + else: + out = func(attribute) + + return out diff --git a/respy/pre_processing/model_processing.py b/respy/pre_processing/model_processing.py index 16f38c7aa..edc5d874c 100644 --- a/respy/pre_processing/model_processing.py +++ b/respy/pre_processing/model_processing.py @@ -19,6 +19,10 @@ from respy.config import SEED_STARTUP_ITERATION_GAP from respy.pre_processing.model_checking import validate_options from respy.pre_processing.model_checking import validate_params +from respy.pre_processing.process_covariates import remove_irrelevant_covariates +from respy.pre_processing.process_covariates import ( + separate_covariates_into_core_dense_mixed, +) from respy.shared import normalize_probabilities warnings.simplefilter("error", category=pd.errors.PerformanceWarning) @@ -35,7 +39,7 @@ def process_params_and_options(params, options): options = {**DEFAULT_OPTIONS, **options} options = _create_internal_seeds_from_user_seeds(options) - options = _identify_relevant_covariates(options, params) + options = remove_irrelevant_covariates(options, params) validate_options(options) optim_paras = _parse_parameters(params, options) @@ -174,6 +178,12 @@ def _parse_observables(optim_paras, params): parsed_parameters = _parse_probabilities_or_logit_coefficients( params, regex_pattern ) + if len(parsed_parameters) < 2: + warnings.warn( + f"Observable '{observable}' must have at least two possible values. " + "Constant effects should be implemented via constant covariates.", + category=DeprecationWarning, + ) optim_paras["observables"][observable] = parsed_parameters return optim_paras @@ -559,46 +569,6 @@ def _parse_probabilities_or_logit_coefficients(params, regex_for_levels): return container -def _identify_relevant_covariates(options, params): - """Identify the relevant covariates. - - We try to make every model as sparse as possible which means discarding covariates - which are irrelevant. The immediate benefit is that memory consumption and start-up - costs are reduced. - - An advantage further downstream is that the number of lagged choices is inferred - from covariates. Eliminating irrelevant covariates might reduce the number of - implemented lags. - - """ - covariates = options["covariates"] - - relevant_covariates = {} - for cov in covariates: - if cov in params.index.get_level_values("name"): - relevant_covariates[cov] = covariates[cov] - - n_relevant_covariates_changed = True - while n_relevant_covariates_changed: - n_relevant_covariates = len(relevant_covariates) - - for cov in covariates: - for relevant_cov in list(relevant_covariates): - if cov in relevant_covariates[relevant_cov]: - # Append the covariate to the front such that nested covariates are - # created in the beginning. - relevant_covariates = {cov: covariates[cov], **relevant_covariates} - - if n_relevant_covariates == len(relevant_covariates): - n_relevant_covariates_changed = False - else: - n_relevant_covariates_changed = True - - options["covariates"] = relevant_covariates - - return options - - def _sync_optim_paras_and_options(optim_paras, options): """Sync ``optim_paras`` and ``options`` after they have been parsed separately.""" optim_paras["n_periods"] = options["n_periods"] @@ -612,6 +582,7 @@ def _sync_optim_paras_and_options(optim_paras, options): options["covariates"] = {**options["covariates"], **type_covariates} options = _convert_labels_in_formulas_to_codes(options, optim_paras) + options = separate_covariates_into_core_dense_mixed(options, optim_paras) return optim_paras, options diff --git a/respy/pre_processing/process_covariates.py b/respy/pre_processing/process_covariates.py new file mode 100644 index 000000000..426ecbeed --- /dev/null +++ b/respy/pre_processing/process_covariates.py @@ -0,0 +1,167 @@ +"""This module comprises all functions which process the definition of covariates.""" +import copy + + +def remove_irrelevant_covariates(options, params): + """Identify the relevant covariates. + + We try to make every model as sparse as possible which means discarding covariates + which are irrelevant. The immediate benefit is that memory consumption and start-up + costs are reduced. + + An advantage further downstream is that the number of lagged choices is inferred + from covariates. Eliminating irrelevant covariates might reduce the number of + implemented lags. + + The function catches all relevant "high-level" covariates by looking at the `"name"` + index in `params`. "Low-level" covariates which are relevant but not included in the + index are recursively found by checking whether covariates are used in the formula + of relevant covariates. + + See also + -------- + separate_covariates_into_core_dense_mixed + + """ + options = copy.deepcopy(options) + covariates = options["covariates"] + + # Collect initial relevant covariates from params. + relevant_covs = {} + for cov in covariates: + if cov in params.index.get_level_values("name"): + relevant_covs[cov] = covariates[cov] + + # Start by iterating over initial covariates and add variables which are used to + # compute them and repeat the process. + n_relevant_covariates_changed = True + while n_relevant_covariates_changed: + n_relevant_covariates = len(relevant_covs) + + for cov in covariates: + for relevant_cov in relevant_covs: + if cov in relevant_covs[relevant_cov]: + # Append the covariate to the front such that nested covariates are + # created in the beginning. + relevant_covs = {cov: covariates[cov], **relevant_covs} + + n_relevant_covariates_changed = n_relevant_covariates != len(relevant_covs) + + options["covariates"] = relevant_covs + + return options + + +def separate_covariates_into_core_dense_mixed(options, optim_paras): + """Separate covariates into distinct groups. + + Covariates are separated into three groups. + + 1. Covariates which use only information from the core state space. + 2. Covariates which use only information from the dense state space. + 3. Covariates which use information from the core and the dense state space. + + Parameters + ---------- + options : dict + Contains among other information covariates and their formulas. + optim_paras : dict + Contains information to separate the core and dense state space. + + Returns + ------- + options : dict + Contains three new covariate categories. + + """ + options = copy.deepcopy(options) + covariates = options["covariates"] + + # Define two sets with default covariates for the core and dense state space. + core_covs = set( + ["period"] + + [f"exp_{choice}" for choice in optim_paras["choices_w_exp"]] + + [f"lagged_choice_{i}" for i in range(1, optim_paras["n_lagged_choices"] + 1)] + ) + dense_covs = set(optim_paras["observables"]) + if optim_paras["n_types"] >= 2: + dense_covs |= set( + ["type"] + [f"type_{i}" for i in range(2, optim_paras["n_types"] + 1)] + ) + + detailed_covariates = { + cov: {"formula": covariates[cov], "depends_on": set()} for cov in covariates + } + + # Loop over all covariates and add them two the sets if the formula contains + # covariates from the sets. If both lengths of the sets do not change anymore, stop. + n_core_covs_changed = True + n_dense_covs_changed = True + while n_core_covs_changed or n_dense_covs_changed: + n_core_covs = len(core_covs) + n_dense_covs = len(dense_covs) + + for cov, formula in covariates.items(): + matches_core = [i for i in core_covs if i in formula] + if matches_core: + core_covs.update([cov]) + + matches_dense = [i for i in dense_covs if i in formula] + if matches_dense: + dense_covs.update([cov]) + + detailed_covariates[cov]["depends_on"] |= set(matches_core + matches_dense) + + n_core_covs_changed = n_core_covs != len(core_covs) + n_dense_covs_changed = n_dense_covs != len(dense_covs) + + only_core_covs = core_covs - dense_covs + only_dense_covs = dense_covs - core_covs + independent_covs = set(covariates) - core_covs - dense_covs + + options["covariates_core"] = { + cov: detailed_covariates[cov] + for cov in only_core_covs | independent_covs + if cov in detailed_covariates + } + options["covariates_dense"] = { + cov: detailed_covariates[cov] + for cov in only_dense_covs + if cov in detailed_covariates + } + options["covariates_mixed"] = { + cov: detailed_covariates[cov] for cov in core_covs & dense_covs + } + # We cannot overwrite `options["covariates"]`. + options["covariates_all"] = detailed_covariates + + return options + + +def identify_necessary_covariates(dependents, definitions): + """Identify covariates necessary to compute `dependents`. + + This function can be used if only a specific subset of covariates is necessary and + not all covariates. + + See also + -------- + respy.likelihood._compute_x_beta_for_type_probability + + """ + dependents = {dependents} if isinstance(dependents, str) else set(dependents) + new_dependents = dependents.copy() + + while new_dependents: + deps = list(new_dependents) + new_dependents = set() + for dependent in deps: + if dependent in definitions and definitions[dependent]["depends_on"]: + dependents |= definitions[dependent]["depends_on"] + new_dependents |= definitions[dependent]["depends_on"] + else: + dependents.remove(dependent) + + covariates = {dep: definitions[dep] for dep in dependents} + + return covariates diff --git a/respy/shared.py b/respy/shared.py index 989cdc644..d5d78549f 100644 --- a/respy/shared.py +++ b/respy/shared.py @@ -4,8 +4,6 @@ import from respy itself. This is to prevent circular imports. """ -import copy - import chaospy as cp import numba as nb import numpy as np @@ -180,22 +178,6 @@ def generate_column_dtype_dict_for_estimation(optim_paras): return column_dtype_dict -@nb.njit -def clip(x, minimum=None, maximum=None): - """Clip input array at minimum and maximum.""" - out = np.empty_like(x) - - for index, value in np.ndenumerate(x): - if minimum is not None and value < minimum: - out[index] = minimum - elif maximum is not None and value > maximum: - out[index] = maximum - else: - out[index] = value - - return out - - def downcast_to_smallest_dtype(series, downcast_options=None): """Downcast the dtype of a :class:`pandas.Series` to the lowest possible dtype. @@ -248,19 +230,38 @@ def downcast_to_smallest_dtype(series, downcast_options=None): return out -def create_base_covariates(states, covariates_spec, raise_errors=True): - """Create set of covariates for each state. +def cast_bool_to_numeric(df): + """Cast columns with boolean data type to the smallest integer.""" + bool_columns = df.columns[df.dtypes == np.bool] + for column in bool_columns: + df[column] = df[column].astype(np.uint8) + return df + + +def compute_covariates(df, definitions, check_nans=False, raise_errors=True): + """Compute covariates. + + The function iterates over the definitions of covariates and tries to compute them. + It keeps track on how many covariates still need to be computed and stops if the + number does not change anymore. This might be due to missing information. Parameters ---------- - states : pandas.DataFrame - DataFrame with some, not all state space dimensions like period, experiences. - covariates_spec : dict + df : pandas.DataFrame + DataFrame with some, maybe not all state space dimensions like period, + experiences. + definitions : dict Keys represent covariates and values are strings passed to ``df.eval``. + check_nans : bool, default False + Perform a check whether the variables used to compute the selected covariate do + not contain any `np.nan`. This is necessary in + :func:`respy.simulate._sample_characteristic` where some characteristics may + contain missings. raise_errors : bool, default True - Whether to raise errors if a variable was not found. This option is necessary - for, e.g., :func:`~respy.simulate._sample_characteristic` where not all - necessary variables exist and it is not clear how to exclude them easily. + Whether to raise errors if variables cannot be computed. This option is + necessary for, e.g., :func:`~respy.simulate._sample_characteristic` where not + all necessary variables exist and it is not easy to exclude covariates which + depend on them. Returns ------- @@ -269,39 +270,51 @@ def create_base_covariates(states, covariates_spec, raise_errors=True): Raises ------ - pd.core.computation.ops.UndefinedVariableError - If variable on the right-hand-side of the definition is not found in the data. + Exception + If variables cannot be computed and ``raise_errors`` is true. """ - covariates = states.copy() - has_covariates_left_changed = True - covariates_left = list(covariates_spec) + covariates_left = list(definitions) while has_covariates_left_changed: n_covariates_left = len(covariates_left) # Create a copy of `covariates_left` to remove elements without side-effects. - for covariate in copy.copy(covariates_left): + for covariate in covariates_left.copy(): # Check if the covariate does not exist and needs to be computed. - is_covariate_missing = covariate not in covariates.columns - - if is_covariate_missing: - try: - covariates[covariate] = covariates.eval(covariates_spec[covariate]) - except pd.core.computation.ops.UndefinedVariableError: - pass + is_covariate_missing = covariate not in df.columns + if not is_covariate_missing: + covariates_left.remove(covariate) + continue + + # Check that the dependencies are present. + index_or_columns = df.columns.union(df.index.names) + are_dependencies_present = all( + dep in index_or_columns for dep in definitions[covariate]["depends_on"] + ) + if are_dependencies_present: + # If true, perform checks for NaNs. + if check_nans: + have_dependencies_no_missings = all( + df.eval(f"{dep}.notna().all()") + for dep in definitions[covariate]["depends_on"] + ) else: - covariates_left.remove(covariate) + have_dependencies_no_missings = True + else: + have_dependencies_no_missings = False + + if have_dependencies_no_missings: + df[covariate] = df.eval(definitions[covariate]["formula"]) + covariates_left.remove(covariate) has_covariates_left_changed = n_covariates_left != len(covariates_left) if covariates_left and raise_errors: raise Exception(f"Cannot compute all covariates: {covariates_left}.") - covariates = covariates.drop(columns=states.columns) - - return covariates + return df def convert_labeled_variables_to_codes(df, optim_paras): @@ -411,3 +424,93 @@ def create_state_space_columns(optim_paras): return create_core_state_space_columns( optim_paras ) + create_dense_state_space_columns(optim_paras) + + +@nb.guvectorize( + ["f8[:], f8[:], f8[:], f8[:, :], f8, b1[:], f8[:]"], + "(n_choices), (n_choices), (n_choices), (n_draws, n_choices), (), (n_choices) " + "-> ()", + nopython=True, + target="parallel", +) +def calculate_expected_value_functions( + wages, + nonpecs, + continuation_values, + draws, + delta, + is_inadmissible, + expected_value_functions, +): + r"""Calculate the expected maximum of value functions for a set of unobservables. + + The function takes an agent and calculates the utility for each of the choices, the + ex-post rewards, with multiple draws from the distribution of unobservables and adds + the discounted expected maximum utility of subsequent periods resulting from + choices. Averaging over all maximum utilities yields the expected maximum utility of + this state. + + The underlying process in this function is called `Monte Carlo integration`_. The + goal is to approximate an integral by evaluating the integrand at randomly chosen + points. In this setting, one wants to approximate the expected maximum utility of + the current state. + + Note that `wages` have the same length as `nonpecs` despite that wages are only + available in some choices. Missing choices are filled with ones. In the case of a + choice with wage and without wage, flow utilities are + + .. math:: + + \text{Flow Utility} = \text{Wage} * \epsilon + \text{Non-pecuniary} + \text{Flow Utility} = 1 * \epsilon + \text{Non-pecuniary} + + Parameters + ---------- + wages : numpy.ndarray + Array with shape (n_choices,) containing wages. + nonpecs : numpy.ndarray + Array with shape (n_choices,) containing non-pecuniary rewards. + continuation_values : numpy.ndarray + Array with shape (n_choices,) containing expected maximum utility for each + choice in the subsequent period. + draws : numpy.ndarray + Array with shape (n_draws, n_choices). + delta : float + The discount factor. + is_inadmissible: numpy.ndarray + Array with shape (n_choices,) containing indicator for whether the following + state is inadmissible. + + Returns + ------- + expected_value_functions : float + Expected maximum utility of an agent. + + .. _Monte Carlo integration: + https://en.wikipedia.org/wiki/Monte_Carlo_integration + + """ + n_draws, n_choices = draws.shape + + expected_value_functions[0] = 0 + + for i in range(n_draws): + + max_value_functions = 0 + + for j in range(n_choices): + value_function, _ = aggregate_keane_wolpin_utility( + wages[j], + nonpecs[j], + continuation_values[j], + draws[i, j], + delta, + is_inadmissible[j], + ) + + if value_function > max_value_functions: + max_value_functions = value_function + + expected_value_functions[0] += max_value_functions + + expected_value_functions[0] /= n_draws diff --git a/respy/simulate.py b/respy/simulate.py index 9f7064e7e..e566ed6a3 100644 --- a/respy/simulate.py +++ b/respy/simulate.py @@ -6,16 +6,20 @@ import pandas as pd from scipy.special import softmax +from respy.config import INDEXER_INVALID_INDEX +from respy.parallelization import distribute_and_combine_simulation +from respy.parallelization import parallelize_across_dense_dimensions from respy.pre_processing.model_processing import process_params_and_options from respy.shared import calculate_value_functions_and_flow_utilities -from respy.shared import create_base_covariates +from respy.shared import compute_covariates from respy.shared import create_base_draws +from respy.shared import create_core_state_space_columns +from respy.shared import create_state_space_columns from respy.shared import downcast_to_smallest_dtype from respy.shared import rename_labels_from_internal from respy.shared import rename_labels_to_internal from respy.shared import transform_base_draws_with_cholesky_factor -from respy.solve import solve_with_backward_induction -from respy.state_space import StateSpace +from respy.solve import get_solve_func def get_simulate_func( @@ -63,7 +67,7 @@ def get_simulate_func( df, method, n_simulation_periods, options, optim_paras ) - state_space = StateSpace(optim_paras, options) + solve = get_solve_func(params, options) shape = (df.shape[0], len(optim_paras["choices"])) base_draws_sim = create_base_draws( @@ -78,14 +82,14 @@ def get_simulate_func( base_draws_sim=base_draws_sim, base_draws_wage=base_draws_wage, df=df, - state_space=state_space, + solve=solve, options=options, ) return simulate_function -def simulate(params, base_draws_sim, base_draws_wage, df, state_space, options): +def simulate(params, base_draws_sim, base_draws_wage, df, solve, options): """Perform a simulation. This function performs one of three possible simulation exercises. The type of the @@ -125,8 +129,8 @@ def simulate(params, base_draws_sim, base_draws_wage, df, state_space, options): a one-step-ahead simulation. - :class:`pandas.DataFrame` containing only first observations which triggers a n-step-ahead simulation taking the data as initial conditions. - state_space : :class:`~respy.state_space.StateSpace` - State space of the model. + solve : :func:`~respy.solve.solve` + Function which creates the solution of the model with new parameters. options : dict Contains model options. @@ -141,9 +145,7 @@ def simulate(params, base_draws_sim, base_draws_wage, df, state_space, options): optim_paras, options = process_params_and_options(params, options) - # Solve the model. - state_space.update_systematic_rewards(optim_paras) - state_space = solve_with_backward_induction(state_space, optim_paras, options) + state_space = solve(params) # Prepare simulation. n_simulation_periods = int(df.index.get_level_values("period").max() + 1) @@ -169,9 +171,21 @@ def simulate(params, base_draws_sim, base_draws_wage, df, state_space, options): # If it is a one-step-ahead simulation, we pick rows from the panel data. For # n-step-ahead simulation, `df` always contains only data of the current period. current_df = df.query("period == @period").copy() + wages = state_space.get_attribute_from_period("wages", period) + nonpecs = state_space.get_attribute_from_period("nonpecs", period) + continuation_values = state_space.get_continuation_values(period=period) + is_inadmissible = state_space.get_attribute_from_period( + "is_inadmissible", period + ) current_df_extended = _simulate_single_period( - current_df, state_space, optim_paras + current_df, + state_space.indexer[period], + wages, + nonpecs, + continuation_values, + is_inadmissible, + optim_paras=optim_paras, ) data.append(current_df_extended) @@ -257,38 +271,34 @@ def _extend_data_with_sampled_characteristics(df, optim_paras, options): return df -def _simulate_single_period(df, state_space, optim_paras): +@distribute_and_combine_simulation +@parallelize_across_dense_dimensions +def _simulate_single_period( + df, indexer, wages, nonpecs, continuation_values, is_inadmissible, optim_paras +): """Simulate individuals in a single period. - This function takes a set of states and simulates wages, choices and other - information. The information is stored in a NumPy array. + The function performs the following sets: - Parameter - --------- - df : pandas.DataFrame - DataFrame with shape (n_individuals_in_period, n_state_space_dims) which - contains the states of simulated individuals. - state_space : :class:`~respy.state_space.StateSpace` - State space of the model. - optim_paras : dict + - Map individuals in one period to the states in the model. + - Simulate choices and wages for those individuals. + - Store additional information in a :class:`pandas.DataFrame` and return it. """ - period = df.index.get_level_values("period").max() n_wages = len(optim_paras["choices_w_wage"]) - # Get indices which connect states in the state space and simulated agents. - columns = create_state_space_columns(optim_paras) - indices = state_space.indexer[period][tuple(df[col].astype(int) for col in columns)] + # Get indices which connect states in the state space and simulated agents. Subtract + # the minimum of indices (excluding invalid indices) because wages, etc. contain + # only wages in this period and normal indices select rows from all wages. + columns = create_core_state_space_columns(optim_paras) + indices = indexer[tuple(df[col].astype("int64") for col in columns)] + period_indices = indices - np.min(indexer[indexer != INDEXER_INVALID_INDEX]) try: - wages = state_space.wages[indices] - nonpecs = state_space.nonpec[indices] - # Get continuation values. Indices work on the complete state space whereas - # continuation values are period-specific. Make them period-specific. - cont_indices = indices - state_space.slices_by_periods[period].start - continuation_values = state_space.get_continuation_values(period)[cont_indices] - - is_inadmissible = state_space.is_inadmissible[indices] + wages = wages[period_indices] + nonpecs = nonpecs[period_indices] + continuation_values = continuation_values[period_indices] + is_inadmissible = is_inadmissible[period_indices] except IndexError as e: raise Exception( "Simulated individuals could not be mapped to their corresponding states in" @@ -296,11 +306,9 @@ def _simulate_single_period(df, state_space, optim_paras): "option['core_state_space_filters'] and the initial conditions." ) from e - # Select relevant subset of random draws. draws_shock = df[[f"shock_reward_{c}" for c in optim_paras["choices"]]].to_numpy() draws_wage = df[[f"meas_error_wage_{c}" for c in optim_paras["choices"]]].to_numpy() - # Get total values and ex post rewards. value_functions, flow_utilities = calculate_value_functions_and_flow_utilities( wages, nonpecs, @@ -311,9 +319,8 @@ def _simulate_single_period(df, state_space, optim_paras): ) # We need to ensure that no individual chooses an inadmissible state. Thus, set - # value functions to NaN. This cannot be done in - # :func:`aggregate_keane_wolpin_utility` as the interpolation requires a mild - # penalty. + # value functions to NaN. This cannot be done in `aggregate_keane_wolpin_utility` as + # the interpolation requires a mild penalty. value_functions = np.where(is_inadmissible, np.nan, value_functions) choice = np.nanargmax(value_functions, axis=1) @@ -353,8 +360,6 @@ def _sample_characteristic(states_df, options, level_dict, use_keys): Parameters ---------- - lag : int - Number of lag. states_df : pandas.DataFrame Contains the state of each individual. options : dict @@ -363,6 +368,9 @@ def _sample_characteristic(states_df, options, level_dict, use_keys): A dictionary where the keys are the values distributed according to the probability mass function. The values are a :class:`pandas.Series` with covariate names as the index and parameter values. + use_keys : bool + Identifier for whether the keys of the level dict should be used as variables + values or use numeric codes instead. For example, assign numbers to choices. Returns ------- @@ -371,10 +379,9 @@ def _sample_characteristic(states_df, options, level_dict, use_keys): """ # Generate covariates. - covariates_df = create_base_covariates( - states_df, options["covariates"], raise_errors=False + all_data = compute_covariates( + states_df, options["covariates_all"], check_nans=True, raise_errors=False ) - all_data = pd.concat([covariates_df, states_df], axis="columns", sort=False) for column in all_data: if all_data[column].dtype == np.bool: all_data[column] = all_data[column].astype(np.uint8) @@ -574,29 +581,6 @@ def _apply_law_of_motion(df, optim_paras): return df -def create_core_state_space_columns(optim_paras): - """Create internal column names for the core state space.""" - return [f"exp_{choice}" for choice in optim_paras["choices_w_exp"]] + [ - f"lagged_choice_{i}" for i in range(1, optim_paras["n_lagged_choices"] + 1) - ] - - -def create_dense_state_space_columns(optim_paras): - """Create internal column names for the dense state space.""" - columns = list(optim_paras["observables"]) - if optim_paras["n_types"] >= 2: - columns += ["type"] - - return columns - - -def create_state_space_columns(optim_paras): - """Create names of state space dimensions excluding the period and identifier.""" - return create_core_state_space_columns( - optim_paras - ) + create_dense_state_space_columns(optim_paras) - - def _harmonize_simulation_arguments(method, df, n_sim_p, options): """Harmonize the arguments of the simulation.""" if method == "n_step_ahead_with_sampling": @@ -615,15 +599,15 @@ def _harmonize_simulation_arguments(method, df, n_sim_p, options): options["n_periods"] = n_sim_p warnings.warn( f"The number of periods in the model, {options['n_periods']}, is lower than" - f" the requested number of simulated periods, {n_sim_p}. Set " - "model periods equal to simulated periods." + f" the requested number of simulated periods, {n_sim_p}. Set model periods " + "equal to simulated periods." ) return n_sim_p, options def _process_input_df_for_simulation(df, method, n_sim_periods, options, optim_paras): - """Process the ``df`` provided by the user for the simulation.""" + """Process a :class:`pandas.DataFrame` provided by the user for the simulation.""" if method == "n_step_ahead_with_sampling": ids = np.arange(options["simulation_agents"]) index = pd.MultiIndex.from_product( diff --git a/respy/solve.py b/respy/solve.py index 35f2eb07c..466a54a2f 100644 --- a/respy/solve.py +++ b/respy/solve.py @@ -1,24 +1,22 @@ """Everything related to the solution of a structural model.""" -import warnings +import functools -import numba as nb import numpy as np -from respy.config import MAX_LOG_FLOAT +from respy.interpolate import interpolate +from respy.parallelization import parallelize_across_dense_dimensions from respy.pre_processing.model_processing import process_params_and_options -from respy.shared import aggregate_keane_wolpin_utility -from respy.shared import calculate_value_functions_and_flow_utilities -from respy.shared import clip +from respy.shared import calculate_expected_value_functions from respy.shared import transform_base_draws_with_cholesky_factor -from respy.state_space import StateSpace +from respy.state_space import create_state_space_class -def solve(params, options): - """Solve the model. +def get_solve_func(params, options): + """Get the solve function. This function takes a model specification and returns the state space of the model along with components of the solution such as covariates, non-pecuniary rewards, - wages, continuation values and value functions as attributes of the class. + wages, continuation values and expected value functions as attributes of the class. Parameters ---------- @@ -29,19 +27,76 @@ def solve(params, options): Returns ------- - state_space : :class:`~respy.state_space.StateSpace` - State space of the model which is already solved via backward-induction. + solve : :func:`~respy.solve.solve` + Function with partialed arguments. """ optim_paras, options = process_params_and_options(params, options) - state_space = StateSpace(optim_paras, options) - state_space = solve_with_backward_induction(state_space, optim_paras, options) + state_space = create_state_space_class(optim_paras, options) + solve_function = functools.partial(solve, options=options, state_space=state_space) + + return solve_function + + +def solve(params, options, state_space): + """Solve the model.""" + optim_paras, options = process_params_and_options(params, options) + + states = state_space.states + wages = state_space.get_attribute("wages") + nonpecs = state_space.get_attribute("nonpecs") + + wages, nonpecs = _create_choice_rewards(states, wages, nonpecs, optim_paras) + state_space.set_attribute("wages", wages) + state_space.set_attribute("nonpecs", nonpecs) + + if optim_paras["delta"] == 0: + expected_value_functions = _solve_for_myopic_individuals( + state_space.get_attribute("expected_value_functions") + ) + state_space.set_attribute("expected_value_functions", expected_value_functions) + else: + state_space = _solve_with_backward_induction(state_space, optim_paras, options) return state_space -def solve_with_backward_induction(state_space, optim_paras, options): +@parallelize_across_dense_dimensions +def _create_choice_rewards(states, wages, nonpecs, optim_paras): + """Create wage and non-pecuniary reward for each state and choice. + + Note that missing wages filled with ones and missing non-pecuniary rewards with + zeros. This is done in :meth:`_initialize_attributes`. + + """ + for i, choice in enumerate(optim_paras["choices"]): + if f"wage_{choice}" in optim_paras: + wage_columns = optim_paras[f"wage_{choice}"].index + log_wage = np.dot( + states[wage_columns].to_numpy(), + optim_paras[f"wage_{choice}"].to_numpy(), + ) + wages[:, i] = np.exp(log_wage) + + if f"nonpec_{choice}" in optim_paras: + nonpec_columns = optim_paras[f"nonpec_{choice}"].index + nonpecs[:, i] = np.dot( + states[nonpec_columns].to_numpy(), + optim_paras[f"nonpec_{choice}"].to_numpy(), + ) + + return wages, nonpecs + + +@parallelize_across_dense_dimensions +def _solve_for_myopic_individuals(expected_value_functions): + """Solve the dynamic programming problem for myopic individuals.""" + expected_value_functions[:] = 0 + return expected_value_functions + + +def _solve_with_backward_induction(state_space, optim_paras, options): """Calculate utilities with backward induction. Parameters @@ -58,427 +113,86 @@ def solve_with_backward_induction(state_space, optim_paras, options): state_space : :class:`~respy.state_space.StateSpace` """ - n_choices = len(optim_paras["choices"]) n_wages = len(optim_paras["choices_w_wage"]) n_periods = optim_paras["n_periods"] - n_states = state_space.states.shape[0] - - state_space.emax_value_functions = np.zeros(n_states) - - # For myopic agents, utility of later periods does not play a role. - if optim_paras["delta"] == 0: - return state_space - - # Unpack arguments. - delta = optim_paras["delta"] shocks_cholesky = optim_paras["shocks_cholesky"] - shocks_cov = shocks_cholesky.dot(shocks_cholesky.T) - draws_emax_risk = transform_base_draws_with_cholesky_factor( state_space.base_draws_sol, shocks_cholesky, n_wages ) for period in reversed(range(n_periods)): - # Unpack necessary attributes of the specific period. wages = state_space.get_attribute_from_period("wages", period) - nonpec = state_space.get_attribute_from_period("nonpec", period) + nonpecs = state_space.get_attribute_from_period("nonpecs", period) is_inadmissible = state_space.get_attribute_from_period( "is_inadmissible", period ) continuation_values = state_space.get_continuation_values(period) - - n_states_in_period = wages.shape[0] + period_draws_emax_risk = draws_emax_risk[period] # The number of interpolation points is the same for all periods. Thus, for some # periods the number of interpolation points is larger than the actual number of - # states. In that case no interpolation is needed. + # states. In this case, no interpolation is needed. + n_dense_combinations = len(getattr(state_space, "sub_state_spaces", [1])) + n_core_states = state_space.core.query("period == @period").shape[0] + n_states_in_period = n_core_states * n_dense_combinations any_interpolated = ( options["interpolation_points"] <= n_states_in_period and options["interpolation_points"] != -1 ) if any_interpolated: - # These shifts are used to determine the expected values of the working - # alternatives. These are log normal distributed and thus the draws cannot - # simply set to zero, but :math:`E(X) = \exp\{\mu + \frac{\sigma^2}{2}\}`. - shifts = np.zeros(n_choices) - n_choices_w_wage = len(optim_paras["choices_w_wage"]) - shifts[:n_choices_w_wage] = np.exp( - np.clip(np.diag(shocks_cov)[:n_choices_w_wage], 0, MAX_LOG_FLOAT) / 2 - ) - - # Get indicator for interpolation and simulation of states. The seed value - # is the base seed plus the number of the period. Thus, not interpolated - # states are held constant for each periods and not across periods. - not_interpolated = get_not_interpolated_indicator( - options["interpolation_points"], - n_states_in_period, - next(options["solution_seed_iteration"]), - ) - - # Constructing the exogenous variable for all states, including the ones - # where simulation will take place. All information will be used in either - # the construction of the prediction model or the prediction step. - exogenous, max_emax = calculate_exogenous_variables( - wages, nonpec, continuation_values, shifts, delta, is_inadmissible - ) - - # Constructing the dependent variables for all states at the random subset - # of points where the EMAX is actually calculated. - endogenous = calculate_endogenous_variables( + interp_points = int(options["interpolation_points"] / n_dense_combinations) + period_expected_value_functions = interpolate( wages, - nonpec, + nonpecs, continuation_values, - max_emax, - not_interpolated, - draws_emax_risk[period], - delta, is_inadmissible, + period_draws_emax_risk, + interp_points, + optim_paras, + options, ) - # Create prediction model based on the random subset of points where the - # EMAX is actually simulated and thus dependent and independent variables - # are available. For the interpolation points, the actual values are used. - emax = get_predictions(endogenous, exogenous, max_emax, not_interpolated) - else: - emax = calculate_emax_value_functions( + period_expected_value_functions = _full_solution( wages, - nonpec, + nonpecs, continuation_values, - draws_emax_risk[period], - delta, is_inadmissible, + period_draws_emax_risk, + optim_paras, ) - state_space.get_attribute_from_period("emax_value_functions", period)[:] = emax + state_space.set_attribute_from_period( + "expected_value_functions", period_expected_value_functions, period + ) return state_space -def get_not_interpolated_indicator(interpolation_points, n_states, seed): - """Get indicator for states which will be not interpolated. - - Randomness in this function is held constant for each period but not across periods. - This is done by adding the period to the seed set for the solution. - - Parameters - ---------- - interpolation_points : int - Number of states which will be interpolated. - n_states : int - Total number of states in period. - seed : int - Seed to set randomness. - - Returns - ------- - not_interpolated : numpy.ndarray - Array of shape (n_states,) indicating states which will not be interpolated. - - """ - np.random.seed(seed) - - indices = np.random.choice(n_states, size=interpolation_points, replace=False) - - not_interpolated = np.full(n_states, False) - not_interpolated[indices] = True - - return not_interpolated - - -def calculate_exogenous_variables(wages, nonpec, emaxs, draws, delta, is_inadmissible): - """Calculate exogenous variables for interpolation scheme. - - Parameters - ---------- - wages : numpy.ndarray - Array with shape (n_states_in_period, n_wages). - nonpec : numpy.ndarray - Array with shape (n_states_in_period, n_choices). - emaxs : numpy.ndarray - Array with shape (n_states_in_period, n_choices). - draws : numpy.ndarray - Array with shape (n_draws, n_choices). - delta : float - Discount factor. - is_inadmissible : numpy.ndarray - Array with shape (n_states_in_period,) containing an indicator for whether the - state has reached maximum education. - - Returns - ------- - exogenous : numpy.ndarray - Array with shape (n_states_in_period, n_choices * 2 + 1). - max_emax : numpy.ndarray - Array with shape (n_states_in_period,) containing maximum over all value - functions. - - """ - value_functions, _ = calculate_value_functions_and_flow_utilities( - wages, nonpec, emaxs, draws, delta, is_inadmissible - ) - - max_value_functions = value_functions.max(axis=1) - exogenous = max_value_functions.reshape(-1, 1) - value_functions - - exogenous = np.column_stack( - (exogenous, np.sqrt(exogenous), np.ones(exogenous.shape[0])) - ) - - return exogenous, max_value_functions - - -def calculate_endogenous_variables( +@parallelize_across_dense_dimensions +def _full_solution( wages, - nonpec, + nonpecs, continuation_values, - max_value_functions, - not_interpolated, - draws, - delta, is_inadmissible, + period_draws_emax_risk, + optim_paras, ): - """Calculate endogenous variable for all states which are not interpolated. + """Calculate the full solution of the model. - Parameters - ---------- - wages : numpy.ndarray - Array with shape (n_states_in_period, n_wages). - nonpec : numpy.ndarray - Array with shape (n_states_in_period, n_choices). - continuation_values : numpy.ndarray - Array with shape (n_states_in_period, n_choices). - max_value_functions : numpy.ndarray - Array with shape (n_states_in_period,) containing maximum over all value - functions. - not_interpolated : numpy.ndarray - Array with shape (n_states_in_period,) containing indicators for simulated - continuation_values. - draws : numpy.ndarray - Array with shape (n_draws, n_choices) containing draws. - delta : float - Discount factor. - is_inadmissible : numpy.ndarray - Array with shape (n_states_in_period,) containing an indicator for whether the - state has reached maximum education. + In contrast to approximate solution, the Monte Carlo integration is done for each + state and not only a subset. """ - emax_value_functions = calculate_emax_value_functions( - wages[not_interpolated], - nonpec[not_interpolated], - continuation_values[not_interpolated], - draws, - delta, - is_inadmissible[not_interpolated], + period_expected_value_functions = calculate_expected_value_functions( + wages, + nonpecs, + continuation_values, + period_draws_emax_risk, + optim_paras["delta"], + is_inadmissible, ) - endogenous = emax_value_functions - max_value_functions[not_interpolated] - - return endogenous - - -def get_predictions(endogenous, exogenous, max_value_functions, not_interpolated): - """Get predictions for the emax of interpolated states. - - Fit an OLS regression of the exogenous variables on the endogenous variables and use - the results to predict the endogenous variables for all points in state space. Then, - replace emax values for not interpolated states with true value. - Parameters - ---------- - endogenous : numpy.ndarray - Array with shape (num_simulated_states_in_period,) containing emax for states - used to interpolate the rest. - exogenous : numpy.ndarray - Array with shape (n_states_in_period, n_choices * 2 + 1) containing exogenous - variables. - max_value_functions : numpy.ndarray - Array with shape (n_states_in_period,) containing the maximum over all value - functions. - not_interpolated : numpy.ndarray - Array with shape (n_states_in_period,) containing indicator for states which - are not interpolated and used to estimate the coefficients for the - interpolation. - - """ - # Define ordinary least squares model and fit to the data. - beta = ols(endogenous, exogenous[not_interpolated]) - - # Use the model to predict EMAX for all states. As in Keane & Wolpin (1994), - # negative predictions are truncated to zero. - endogenous_predicted = exogenous.dot(beta) - endogenous_predicted = clip(endogenous_predicted, 0) - - # Construct predicted EMAX for all states and the - predictions = endogenous_predicted + max_value_functions - predictions[not_interpolated] = endogenous + max_value_functions[not_interpolated] - - if not np.all(np.isfinite(beta)): - warnings.warn("OLS coefficients in the interpolation are not finite.") - - return predictions - - -@nb.guvectorize( - ["f8[:], f8[:], f8[:], f8[:, :], f8, b1[:], f8[:]"], - "(n_choices), (n_choices), (n_choices), (n_draws, n_choices), (), (n_choices) " - "-> ()", - nopython=True, - target="parallel", -) -def calculate_emax_value_functions( - wages, - nonpec, - continuation_values, - draws, - delta, - is_inadmissible, - emax_value_functions, -): - r"""Calculate the expected maximum of value functions for a set of unobservables. - - The function takes an agent and calculates the utility for each of the choices, the - ex-post rewards, with multiple draws from the distribution of unobservables and adds - the discounted expected maximum utility of subsequent periods resulting from - choices. Averaging over all maximum utilities yields the expected maximum utility of - this state. - - The underlying process in this function is called `Monte Carlo integration`_. The - goal is to approximate an integral by evaluating the integrand at randomly chosen - points. In this setting, one wants to approximate the expected maximum utility of - the current state. - - Note that ``wages`` have the same length as ``nonpec`` despite that wages are only - available in some choices. Missing choices are filled with ones. In the case of a - choice with wage and without wage, flow utilities are - - .. math:: - - \text{Flow Utility} = \text{Wage} * \epsilon + \text{Non-pecuniary} - \text{Flow Utility} = 1 * \epsilon + \text{Non-pecuniary} - - - Parameters - ---------- - wages : numpy.ndarray - Array with shape (n_choices,) containing wages. - nonpec : numpy.ndarray - Array with shape (n_choices,) containing non-pecuniary rewards. - continuation_values : numpy.ndarray - Array with shape (n_choices,) containing expected maximum utility for each - choice in the subsequent period. - draws : numpy.ndarray - Array with shape (n_draws, n_choices). - delta : float - The discount factor. - is_inadmissible: numpy.ndarray - Array with shape (n_choices,) containing indicator for whether the following - state is inadmissible. - - Returns - ------- - emax_value_functions : float - Expected maximum utility of an agent. - - .. _Monte Carlo integration: - https://en.wikipedia.org/wiki/Monte_Carlo_integration - - """ - n_draws, n_choices = draws.shape - - emax_value_functions[0] = 0 - - for i in range(n_draws): - - max_value_functions = 0 - - for j in range(n_choices): - value_function, _ = aggregate_keane_wolpin_utility( - wages[j], - nonpec[j], - continuation_values[j], - draws[i, j], - delta, - is_inadmissible[j], - ) - - if value_function > max_value_functions: - max_value_functions = value_function - - emax_value_functions[0] += max_value_functions - - emax_value_functions[0] /= n_draws - - -@nb.njit -def ols(y, x): - """Calculate OLS coefficients using a pseudo-inverse. - - Parameters - ---------- - x : numpy.ndarray - n x n matrix of independent variables. - y : numpy.ndarray - n x 1 matrix with dependent variable. - - Returns - ------- - beta : numpy.ndarray - n x 1 array of estimated parameter vector - - """ - beta = np.dot(np.linalg.pinv(x.T.dot(x)), x.T.dot(y)) - return beta - - -def mse(x1, x2, axis=0): - """Calculate mean squared error. - - If ``x1`` and ``x2`` have different shapes, then they need to broadcast. This uses - :func:`numpy.asanyarray` to convert the input. Whether this is the desired result or - not depends on the array subclass, for example NumPy matrices will silently - produce an incorrect result. - - Parameters - ---------- - x1, x2 : array_like - The performance measure depends on the difference between these two arrays. - axis : int - Axis along which the summary statistic is calculated - - Returns - ------- - mse : numpy.ndarray or float - Mean squared error along given axis. - - """ - x1 = np.asanyarray(x1) - x2 = np.asanyarray(x2) - return np.mean((x1 - x2) ** 2, axis=axis) - - -def rmse(x1, x2, axis=0): - """Calculate root mean squared error. - - If ``x1`` and ``x2`` have different shapes, then they need to broadcast. This uses - :func:`numpy.asanyarray` to convert the input. Whether this is the desired result or - not depends on the array subclass, for example NumPy matrices will silently - produce an incorrect result. - - Parameters - ---------- - x1, x2 : array_like - The performance measure depends on the difference between these two arrays. - axis : int - Axis along which the summary statistic is calculated. - - Returns - ------- - rmse : numpy.ndarray or float - Root mean squared error along given axis. - - """ - x1 = np.asanyarray(x1) - x2 = np.asanyarray(x2) - return np.sqrt(mse(x1, x2, axis=axis)) + return period_expected_value_functions diff --git a/respy/state_space.py b/respy/state_space.py index f1efbc90a..7d9a2c21e 100644 --- a/respy/state_space.py +++ b/respy/state_space.py @@ -8,18 +8,142 @@ from respy._numba import array_to_tuple from respy.config import INDEXER_DTYPE from respy.config import INDEXER_INVALID_INDEX -from respy.config import MAX_LOG_FLOAT -from respy.config import MIN_LOG_FLOAT -from respy.shared import create_base_covariates +from respy.shared import cast_bool_to_numeric +from respy.shared import compute_covariates from respy.shared import create_base_draws +from respy.shared import create_core_state_space_columns +from respy.shared import create_dense_state_space_columns from respy.shared import downcast_to_smallest_dtype -class StateSpace: +def create_state_space_class(optim_paras, options): + """Create the state space of the model.""" + core, indexer = _create_core_and_indexer(optim_paras, options) + dense_grid = _create_dense_state_space_grid(optim_paras) + + # Downcast after calculations or be aware of silent integer overflows. + core = compute_covariates(core, options["covariates_core"]) + core = core.apply(downcast_to_smallest_dtype) + dense = _create_dense_state_space_covariates(dense_grid, optim_paras, options) + + base_draws_sol = create_base_draws( + (options["n_periods"], options["solution_draws"], len(optim_paras["choices"])), + next(options["solution_seed_startup"]), + options["monte_carlo_sequence"], + ) + + if dense: + state_space = _MultiDimStateSpace( + core, indexer, base_draws_sol, optim_paras, options, dense + ) + else: + state_space = _SingleDimStateSpace( + core, indexer, base_draws_sol, optim_paras, options + ) + + return state_space + + +class _BaseStateSpace: + """The base class of a state space. + + The base class includes some methods which should be available to both state spaces + and are shared between multiple sub state spaces. + + """ + + def _create_slices_by_core_periods(self): + """Create slices to index all attributes in a given period. + + It is important that the returned objects are not fancy indices. Fancy indexing + results in copies of array which decrease performance and raise memory usage. + + """ + period = self.core.period + indices = np.where(period - period.shift(1).fillna(-1) == 1)[0] + indices = np.append(indices, self.core.shape[0]) + + slices = [slice(indices[i], indices[i + 1]) for i in range(len(indices) - 1)] + + return slices + + def _create_is_inadmissible(self, optim_paras, options): + core = self.core.copy() + # Apply the maximum experience as a default constraint only if its not the last + # period. Otherwise, it is unconstrained. + for choice in optim_paras["choices_w_exp"]: + max_exp = optim_paras["choices"][choice]["max"] + formula = ( + f"exp_{choice} == {max_exp}" + if max_exp != optim_paras["n_periods"] - 1 + else "False" + ) + core[choice] = core.eval(formula) + + # Apply no constraint for choices without experience. + for choice in optim_paras["choices_wo_exp"]: + core[choice] = core.eval("False") + + # Apply user-defined constraints + for choice in optim_paras["choices"]: + for formula in options["inadmissible_states"].get(choice, []): + core[choice] |= core.eval(formula) + + is_inadmissible = core[optim_paras["choices"]].to_numpy(dtype=np.bool) + + return is_inadmissible + + def _create_indices_of_child_states(self, optim_paras): + """For each parent state get the indices of child states. + + During the backward induction, the ``expected_value_functions`` in the future + period serve as the ``continuation_values`` of the current period. As the + indices for child states never change, these indices can be precomputed and + added to the state_space. + + Actually, the indices of the child states do not have to cover the last period, + but it makes the code prettier and reduces the need to expand the indices in the + estimation. + + """ + n_choices = len(optim_paras["choices"]) + n_choices_w_exp = len(optim_paras["choices_w_exp"]) + n_periods = optim_paras["n_periods"] + n_states = self.core.shape[0] + core_columns = create_core_state_space_columns(optim_paras) + + indices = np.full( + (n_states, n_choices), INDEXER_INVALID_INDEX, dtype=INDEXER_DTYPE + ) + + # Skip the last period which does not have child states. + for period in reversed(range(n_periods - 1)): + states_in_period = self.core.query("period == @period")[ + core_columns + ].to_numpy(dtype=np.int8) + + indices = _insert_indices_of_child_states( + indices, + states_in_period, + self.indexer[period], + self.indexer[period + 1], + self.is_inadmissible, + n_choices_w_exp, + optim_paras["n_lagged_choices"], + ) + + return indices + + +class _SingleDimStateSpace(_BaseStateSpace): """The state space of a discrete choice dynamic programming model. Parameters ---------- + core : pandas.DataFrame + DataFrame containing the core state space. + indexer : numpy.ndarray + Multidimensional array containing indices of states in valid positions. optim_paras : dict Dictionary containing model parameters. options : dict @@ -44,110 +168,75 @@ class StateSpace: Array with shape (n_states, n_choices + 3) containing containing the emax of each choice of the subsequent period and the simulated or interpolated maximum of the current period. - emax_value_functions : numpy.ndarray + expected_value_functions : numpy.ndarray Array with shape (n_states, 1) containing the expected maximum of choice-specific value functions. """ - def __init__(self, optim_paras, options): - """Initialize the state space class.""" - self.base_draws_sol = create_base_draws( - ( - options["n_periods"], - options["solution_draws"], - len(optim_paras["choices"]), - ), - next(options["solution_seed_startup"]), - options["monte_carlo_sequence"], + def __init__( + self, + core, + indexer, + base_draws_sol, + optim_paras, + options, + dense_dim=None, + dense_covariates=None, + is_inadmissible=None, + indices_of_child_states=None, + slices_by_periods=None, + ): + self.dense_dim = dense_dim + self.core = core + self.indexer = indexer + self.dense_covariates = dense_covariates if dense_covariates is not None else {} + self.mixed_covariates = options["covariates_mixed"] + self.base_draws_sol = base_draws_sol + self.slices_by_periods = ( + super()._create_slices_by_core_periods() + if slices_by_periods is None + else slices_by_periods ) - - states_df, self.indexer = _create_state_space(optim_paras, options) - - base_covariates_df = create_base_covariates(states_df, options["covariates"]) - - # Downcast after calculations or be aware of silent integer overflows. - states_df = states_df.apply(downcast_to_smallest_dtype) - base_covariates_df = base_covariates_df.apply(downcast_to_smallest_dtype) - self.states = states_df.to_numpy() - - self.covariates = _create_choice_covariates( - base_covariates_df, states_df, optim_paras + self._initialize_attributes(optim_paras) + self.is_inadmissible = ( + self._create_is_inadmissible(optim_paras, options) + if is_inadmissible is None + else is_inadmissible ) - - self.wages, self.nonpec = _create_reward_components( - self.states[:, -1], self.covariates, optim_paras - ) - - self.is_inadmissible = _create_is_inadmissible_indicator( - states_df, optim_paras, options + self.indices_of_child_states = ( + super()._create_indices_of_child_states(optim_paras) + if indices_of_child_states is None + else indices_of_child_states ) - self._create_slices_by_periods(options["n_periods"]) - - self.indices_of_child_states = _get_indices_of_child_states(self, optim_paras) - - def update_systematic_rewards(self, optim_paras): - """Update wages and non-pecuniary rewards. - - During the estimation, the rewards need to be updated according to the new - parameters whereas the covariates stay the same. + def get_attribute(self, attr): + """Get an attribute of the state space.""" + return getattr(self, attr) - """ - self.wages, self.nonpec = _create_reward_components( - self.states[:, -1], self.covariates, optim_paras - ) - - def get_attribute_from_period(self, attr, period): + def get_attribute_from_period(self, attribute, period): """Get an attribute of the state space sliced to a given period. Parameters ---------- - attr : str + attribute : str Attribute name, e.g. ``"states"`` to retrieve ``self.states``. period : int Attribute is retrieved from this period. """ - if attr == "covariates": - raise AttributeError("Attribute covariates cannot be retrieved by periods.") - else: - pass + attr = self.get_attribute(attribute) + slice_ = self.slices_by_periods[period] + out = attr[slice_] - try: - attribute = getattr(self, attr) - except AttributeError as e: - raise AttributeError(f"StateSpace has no attribute {attr}.").with_traceback( - e.__traceback__ - ) - - try: - indices = self.slices_by_periods[period] - except IndexError as e: - raise IndexError(f"StateSpace has no period {period}.").with_traceback( - e.__traceback__ - ) - - return attribute[indices] - - def _create_slices_by_periods(self, n_periods): - """Create slices to index all attributes in a given period. - - It is important that the returned objects are not fancy indices. Fancy indexing - results in copies of array which decrease performance and raise memory usage. - - """ - self.slices_by_periods = [] - for i in range(n_periods): - idx_start, idx_end = np.where(self.states[:, 0] == i)[0][[0, -1]] - self.slices_by_periods.append(slice(idx_start, idx_end + 1)) + return out def get_continuation_values(self, period=None, indices=None): """Return the continuation values for a given period or states. If the last period is selected, return a matrix of zeros. In any other period, - use the precomputed ``indices_of_child_states`` to select continuation values - from ``emax_value_functions``. + use the precomputed `indices_of_child_states` to select continuation values from + `expected_value_functions`. You can also indices to collect continuation values across periods. @@ -170,12 +259,12 @@ def get_continuation_values(self, period=None, indices=None): if period == n_periods - 1: last_slice = self.slices_by_periods[-1] n_states_last_period = len(range(last_slice.start, last_slice.stop)) - n_choices = self.is_inadmissible.shape[1] + n_choices = self.get_attribute("is_inadmissible").shape[1] continuation_values = np.zeros((n_states_last_period, n_choices)) else: if indices is not None: - child_indices = self.indices_of_child_states[indices] + child_indices = self.get_attribute("indices_of_child_states")[indices] elif period is not None and 0 <= period <= n_periods - 2: child_indices = self.get_attribute_from_period( "indices_of_child_states", period @@ -186,13 +275,111 @@ def get_continuation_values(self, period=None, indices=None): mask = child_indices != INDEXER_INVALID_INDEX valid_indices = np.where(mask, child_indices, 0) continuation_values = np.where( - mask, self.emax_value_functions[valid_indices], 0 + mask, self.get_attribute("expected_value_functions")[valid_indices], 0 ) return continuation_values + def set_attribute(self, attribute, value): + self.get_attribute(attribute)[:] = value + + def set_attribute_from_period(self, attribute, value, period): + self.get_attribute_from_period(attribute, period)[:] = value + + @property + def states(self): + states = self.core.copy().assign(**self.dense_covariates) + states = compute_covariates(states, self.mixed_covariates) + states = cast_bool_to_numeric(states) + return states + + def _initialize_attributes(self, optim_paras): + """Initialize attributes to use references later.""" + n_states = self.core.shape[0] + n_choices = len(optim_paras["choices"]) + n_choices_w_wage = len(optim_paras["choices_w_wage"]) + n_choices_wo_wage = n_choices - n_choices_w_wage + + for name, array in ( + ("expected_value_functions", np.empty(n_states)), + ( + "wages", + np.column_stack( + ( + np.empty((n_states, n_choices_w_wage)), + np.ones((n_states, n_choices_wo_wage)), + ) + ), + ), + ("nonpecs", np.zeros((n_states, n_choices))), + ): + setattr(self, name, array) + + +class _MultiDimStateSpace(_BaseStateSpace): + """The state space of a discrete choice dynamic programming model. + + This class wraps the whole state space of the model. + + """ + + def __init__(self, core, indexer, base_draws_sol, optim_paras, options, dense): + self.base_draws_sol = base_draws_sol + self.core = core + self.indexer = indexer + self.is_inadmissible = super()._create_is_inadmissible(optim_paras, options) + self.indices_of_child_states = super()._create_indices_of_child_states( + optim_paras + ) + self.slices_by_periods = super()._create_slices_by_core_periods() + self.sub_state_spaces = { + dense_dim: _SingleDimStateSpace( + self.core, + self.indexer, + self.base_draws_sol, + optim_paras, + options, + dense_dim, + dense_covariates, + self.is_inadmissible, + self.indices_of_child_states, + self.slices_by_periods, + ) + for dense_dim, dense_covariates in dense.items() + } + + def get_attribute(self, attribute): + return { + key: sss.get_attribute(attribute) + for key, sss in self.sub_state_spaces.items() + } + + def get_attribute_from_period(self, attribute, period): + return { + key: sss.get_attribute_from_period(attribute, period) + for key, sss in self.sub_state_spaces.items() + } + + def get_continuation_values(self, period=None, indices=None): + return { + key: sss.get_continuation_values(period, indices) + for key, sss in self.sub_state_spaces.items() + } + + def set_attribute(self, attribute, value): + for key, sss in self.sub_state_spaces.items(): + sss.set_attribute(attribute, value[key]) -def _create_state_space(optim_paras, options): + def set_attribute_from_period(self, attribute, value, period): + for key, sss in self.sub_state_spaces.items(): + sss.set_attribute_from_period(attribute, value[key], period) + + @property + def states(self): + return {key: sss.states for key, sss in self.sub_state_spaces.items()} + + +def _create_core_and_indexer(optim_paras, options): """Create the state space. The state space of the model are all feasible combinations of the period, @@ -254,26 +441,22 @@ def _create_state_space(optim_paras, options): _create_core_state_space_per_period _filter_core_state_space _add_initial_experiences_to_core_state_space - _create_state_space_indexer + _create_core_state_space_indexer """ - df = _create_core_state_space(optim_paras) + core = _create_core_state_space(optim_paras) - df = _add_lagged_choice_to_core_state_space(df, optim_paras) + core = _add_lagged_choice_to_core_state_space(core, optim_paras) - df = _filter_core_state_space(df, options) + core = _filter_core_state_space(core, options) - df = _add_initial_experiences_to_core_state_space(df, optim_paras) + core = _add_initial_experiences_to_core_state_space(core, optim_paras) - df = _add_observables_to_state_space(df, optim_paras) + core = core.sort_values("period").reset_index(drop=True) - df = _add_types_to_state_space(df, optim_paras["n_types"]) + indexer = _create_core_state_space_indexer(core, optim_paras) - df = df.sort_values("period").reset_index(drop=True) - - indexer = _create_state_space_indexer(df, optim_paras) - - return df, indexer + return core, indexer def _create_core_state_space(optim_paras): @@ -442,37 +625,18 @@ def _add_initial_experiences_to_core_state_space(df, optim_paras): return df -def _add_observables_to_state_space(df, optim_paras): +def _create_dense_state_space_grid(optim_paras): levels_of_observables = [range(len(i)) for i in optim_paras["observables"].values()] - combinations = itertools.product(*levels_of_observables) + types = [range(optim_paras["n_types"])] if optim_paras["n_types"] >= 2 else [] - container = [] - for combination in combinations: - df_ = df.copy() - df_ = df_.assign( - **{col: val for col, val in zip(optim_paras["observables"], combination)} - ) - container.append(df_) + dense_state_space_grid = list(itertools.product(*levels_of_observables, *types)) + if dense_state_space_grid == [()]: + dense_state_space_grid = False - df = pd.concat(container, axis="rows", sort=False) if container else df + return dense_state_space_grid - return df - -def _add_types_to_state_space(df, n_types): - if n_types >= 2: - container = [] - for i in range(n_types): - df_ = df.copy() - df_["type"] = i - container.append(df_) - - df = pd.concat(container, axis="rows", sort=False) - - return df - - -def _create_state_space_indexer(df, optim_paras): +def _create_core_state_space_indexer(df, optim_paras): """Create the indexer for the state space. The indexer consists of sub indexers for each period. This is much more @@ -500,26 +664,18 @@ def _create_state_space_indexer(df, optim_paras): shape = ( tuple(np.minimum(max_initial_experience + period, max_experience) + 1) + (n_choices,) * optim_paras["n_lagged_choices"] - + tuple(len(x) for x in optim_paras["observables"].values()) ) - if optim_paras["n_types"] >= 2: - shape += (optim_paras["n_types"],) - sub_indexer = np.full(shape, INDEXER_INVALID_INDEX, dtype=INDEXER_DTYPE) sub_df = df.query("period == @period") n_states = sub_df.shape[0] - indices = ( - tuple(sub_df[f"exp_{i}"] for i in optim_paras["choices_w_exp"]) - + tuple( - sub_df[f"lagged_choice_{i}"] - for i in range(1, optim_paras["n_lagged_choices"] + 1) - ) - + tuple(sub_df[observable] for observable in optim_paras["observables"]) + indices = tuple( + sub_df[f"exp_{i}"] for i in optim_paras["choices_w_exp"] + ) + tuple( + sub_df[f"lagged_choice_{i}"] + for i in range(1, optim_paras["n_lagged_choices"] + 1) ) - if optim_paras["n_types"] >= 2: - indices += (sub_df["type"],) sub_indexer[indices] = np.arange(count_states, count_states + n_states) indexer.append(sub_indexer) @@ -529,152 +685,6 @@ def _create_state_space_indexer(df, optim_paras): return indexer -def _create_reward_components(types, covariates, optim_paras): - """Calculate systematic rewards for each state. - - Wages are only available for some choices, i.e. n_nonpec >= n_wages. We extend the - array of wages with ones for the difference in dimensions, n_nonpec - n_wages. Ones - are necessary as it facilitates the aggregation of reward components in - :func:`calculate_emax_value_functions` and related functions. - - Parameters - ---------- - types : numpy.ndarray - Array with shape (n_states,) containing type information. - covariates : dict - Dictionary with covariate arrays for wage and nonpec rewards. - optim_paras : dict - Contains parameters affected by the optimization. - - """ - wage_labels = [f"wage_{choice}" for choice in optim_paras["choices_w_wage"]] - log_wages = np.column_stack( - [np.dot(covariates[w], optim_paras[w]) for w in wage_labels] - ) - - n_states = types.shape[0] - - nonpec_labels = [f"nonpec_{choice}" for choice in optim_paras["choices"]] - nonpec = np.column_stack( - [ - np.zeros(n_states) - if n not in optim_paras - else np.dot(covariates[n], optim_paras[n]) - for n in nonpec_labels - ] - ) - - wages = np.exp(np.clip(log_wages, MIN_LOG_FLOAT, MAX_LOG_FLOAT)) - - # Extend wages to dimension of non-pecuniary rewards. - additional_dim = nonpec.shape[1] - log_wages.shape[1] - wages = np.column_stack((wages, np.ones((wages.shape[0], additional_dim)))) - - return wages, nonpec - - -def _create_choice_covariates(covariates_df, states_df, optim_paras): - """Create the covariates for each choice. - - Parameters - ---------- - covariates_df : pandas.DataFrame - DataFrame with the basic covariates. - states_df : pandas.DataFrame - DataFrame with the state information. - optim_paras : dict - Dictionary of parsed parameters. - - Returns - ------- - covariates : dict - Dictionary where values are the wage or non-pecuniary covariates for choices. - - """ - all_data = pd.concat([covariates_df, states_df], axis="columns", sort=False) - - covariates = {} - - for choice in optim_paras["choices"]: - if f"wage_{choice}" in optim_paras: - wage_columns = optim_paras[f"wage_{choice}"].index - covariates[f"wage_{choice}"] = all_data[wage_columns].to_numpy() - - if f"nonpec_{choice}" in optim_paras: - nonpec_columns = optim_paras[f"nonpec_{choice}"].index - covariates[f"nonpec_{choice}"] = all_data[nonpec_columns].to_numpy() - - for key, val in covariates.items(): - covariates[key] = np.ascontiguousarray(val) - - return covariates - - -def _create_is_inadmissible_indicator(states, optim_paras, options): - df = states.copy() - - # Apply the maximum experience as a default constraint only if its not the last - # period. Otherwise, it is unconstrained. - for choice in optim_paras["choices_w_exp"]: - max_exp = optim_paras["choices"][choice]["max"] - formula = ( - f"exp_{choice} == {max_exp}" - if max_exp != optim_paras["n_periods"] - 1 - else "False" - ) - df[choice] = df.eval(formula) - - # Apply no constraint for choices without experience. - for choice in optim_paras["choices_wo_exp"]: - df[choice] = df.eval("False") - - # Apply user-defined constraints - for choice in optim_paras["choices"]: - for formula in options["inadmissible_states"].get(choice, []): - df[choice] |= df.eval(formula) - - is_inadmissible = df[optim_paras["choices"]].to_numpy() - - return is_inadmissible - - -def _get_indices_of_child_states(state_space, optim_paras): - """For each parent state get the indices of child states. - - During the backward induction, the ``emax_value_functions`` in the future period - serve as the ``continuation_values`` of the current period. As the indices for child - states never change, these indices can be precomputed and added to the state_space. - - Actually, the indices of the child states do not have to cover the last period, but - it makes the code prettier and reduces the need to expand the indices in the - estimation. - - """ - n_choices = len(optim_paras["choices"]) - n_choices_w_exp = len(optim_paras["choices_w_exp"]) - n_periods = optim_paras["n_periods"] - n_states = state_space.states.shape[0] - - indices = np.full((n_states, n_choices), INDEXER_INVALID_INDEX, dtype=INDEXER_DTYPE) - - # Skip the last period which does not have child states. - for period in reversed(range(n_periods - 1)): - - states_in_period = state_space.get_attribute_from_period("states", period) - - indices = _insert_indices_of_child_states( - indices, - states_in_period, - state_space.indexer[period], - state_space.indexer[period + 1], - state_space.is_inadmissible, - n_choices_w_exp, - optim_paras["n_lagged_choices"], - ) - - return indices - - @nb.njit def _insert_indices_of_child_states( indices, @@ -690,7 +700,7 @@ def _insert_indices_of_child_states( for i in range(states.shape[0]): - idx_current = indexer_current[array_to_tuple(indexer_current, states[i, 1:])] + idx_current = indexer_current[array_to_tuple(indexer_current, states[i])] for choice in range(n_choices): # Check if the state in the future is admissible. @@ -698,7 +708,7 @@ def _insert_indices_of_child_states( continue else: # Cut off the period which is not necessary for the indexer. - child = states[i, 1:].copy() + child = states[i].copy() # Increment experience if it is a choice with experience # accumulation. @@ -718,3 +728,26 @@ def _insert_indices_of_child_states( indices[idx_current, choice] = idx_future return indices + + +def _create_dense_state_space_covariates(dense_grid, optim_paras, options): + if dense_grid: + columns = create_dense_state_space_columns(optim_paras) + + df = pd.DataFrame(data=dense_grid, columns=columns).set_index( + columns, drop=False + ) + + covariates = compute_covariates(df, options["covariates_dense"]) + covariates = covariates.apply(downcast_to_smallest_dtype) + covariates = covariates.to_dict(orient="index") + + # Convert scalar keys to tuples. + for key in covariates.copy(): + if np.isscalar(key): + covariates[(key,)] = covariates.pop(key) + + else: + covariates = False + + return covariates diff --git a/respy/tests/random_model.py b/respy/tests/random_model.py index e15b4343c..c833392b9 100644 --- a/respy/tests/random_model.py +++ b/respy/tests/random_model.py @@ -138,8 +138,9 @@ def generate_random_model( observables = point_constr.pop("observables", None) if observables is None: n_observables = np.random.randint(0, 3) + # Do not sample observables with 1 level! observables = ( - np.random.randint(1, 4, size=n_observables) if n_observables else False + np.random.randint(2, 4, size=n_observables) if n_observables else False ) if observables is not False: @@ -148,7 +149,7 @@ def generate_random_model( observable_prob_template(observables), observable_coeffs_template(observables, params), ] - params = pd.concat(to_concat, axis=0, sort=False) + params = pd.concat(to_concat, axis="rows", sort=False) indices = ( params.index.get_level_values("category") diff --git a/respy/tests/test_interpolate.py b/respy/tests/test_interpolate.py new file mode 100644 index 000000000..4e32be89e --- /dev/null +++ b/respy/tests/test_interpolate.py @@ -0,0 +1,11 @@ +from respy.solve import get_solve_func +from respy.tests.random_model import generate_random_model + + +def test_simple_run(): + params, options = generate_random_model( + point_constr={"n_periods": 5, "interpolation_points": 10} + ) + + solve = get_solve_func(params, options) + solve(params) diff --git a/respy/tests/test_model.py b/respy/tests/test_model.py index 3b140563c..8be015f02 100644 --- a/respy/tests/test_model.py +++ b/respy/tests/test_model.py @@ -10,9 +10,9 @@ from respy.likelihood import get_crit_func from respy.pre_processing.model_checking import validate_options from respy.pre_processing.model_processing import _convert_labels_in_formulas_to_codes -from respy.pre_processing.model_processing import _identify_relevant_covariates from respy.pre_processing.model_processing import _parse_initial_and_max_experience from respy.pre_processing.model_processing import process_params_and_options +from respy.pre_processing.process_covariates import remove_irrelevant_covariates from respy.tests.random_model import generate_random_model from respy.tests.random_model import simulate_truncated_data from respy.tests.utils import process_model_or_seed @@ -159,7 +159,7 @@ def test_identify_relevant_covariates(): } } - relevant_covariates = _identify_relevant_covariates(options, params) + relevant_covariates = remove_irrelevant_covariates(options, params) expected = { "covariates": { diff --git a/respy/tests/test_randomness.py b/respy/tests/test_randomness.py index 13ca83e8a..ff46657e3 100644 --- a/respy/tests/test_randomness.py +++ b/respy/tests/test_randomness.py @@ -2,7 +2,10 @@ import numpy as np import pytest -import respy as rp +from respy.likelihood import get_crit_func +from respy.simulate import get_simulate_func +from respy.solve import get_solve_func +from respy.tests.utils import apply_to_attributes_of_two_state_spaces from respy.tests.utils import process_model_or_seed @@ -12,23 +15,37 @@ def test_invariance_of_model_solution_in_solve_and_criterion_functions(model): options["n_periods"] = 2 if model == "kw_2000" else 3 - state_space = rp.solve(params, options) + solve = get_solve_func(params, options) + state_space = solve(params) - simulate = rp.get_simulate_func(params, options) + simulate = get_simulate_func(params, options) df = simulate(params) - state_space_sim = simulate.keywords["state_space"] + state_space_sim = simulate.keywords["solve"].keywords["state_space"] - criterion = rp.get_crit_func(params, options, df) + criterion = get_crit_func(params, options, df) _ = criterion(params) - state_space_crit = criterion.keywords["state_space"] + state_space_crit = criterion.keywords["solve"].keywords["state_space"] for state_space_ in [state_space_sim, state_space_crit]: - np.testing.assert_array_equal(state_space.states, state_space_.states) - np.testing.assert_array_equal(state_space.wages, state_space_.wages) - np.testing.assert_array_equal(state_space.nonpec, state_space_.nonpec) - np.testing.assert_array_equal( - state_space.emax_value_functions, state_space_.emax_value_functions + assert state_space.core.equals(state_space_.core) + + apply_to_attributes_of_two_state_spaces( + state_space.get_attribute("wages"), + state_space_.get_attribute("wages"), + np.testing.assert_array_equal, + ) + apply_to_attributes_of_two_state_spaces( + state_space.get_attribute("nonpecs"), + state_space_.get_attribute("nonpecs"), + np.testing.assert_array_equal, + ) + apply_to_attributes_of_two_state_spaces( + state_space.get_attribute("expected_value_functions"), + state_space_.get_attribute("expected_value_functions"), + np.testing.assert_array_equal, ) - np.testing.assert_array_equal( - state_space.base_draws_sol, state_space_.base_draws_sol + apply_to_attributes_of_two_state_spaces( + state_space.get_attribute("base_draws_sol"), + state_space_.get_attribute("base_draws_sol"), + np.testing.assert_array_equal, ) diff --git a/respy/tests/test_solve.py b/respy/tests/test_solve.py index d87247f90..58fef4285 100644 --- a/respy/tests/test_solve.py +++ b/respy/tests/test_solve.py @@ -1,18 +1,21 @@ import numpy as np +import pandas as pd import pytest -import respy as rp from respy.config import EXAMPLE_MODELS from respy.config import INDEXER_INVALID_INDEX from respy.config import KEANE_WOLPIN_1994_MODELS from respy.config import KEANE_WOLPIN_1997_MODELS from respy.pre_processing.model_checking import check_model_solution from respy.pre_processing.model_processing import process_params_and_options -from respy.state_space import _create_state_space +from respy.shared import create_core_state_space_columns +from respy.solve import get_solve_func +from respy.state_space import _create_core_and_indexer from respy.state_space import _insert_indices_of_child_states from respy.tests._former_code import _create_state_space_kw94 from respy.tests._former_code import _create_state_space_kw97_base from respy.tests._former_code import _create_state_space_kw97_extended +from respy.tests.utils import apply_to_attributes_of_two_state_spaces from respy.tests.utils import process_model_or_seed @@ -20,7 +23,8 @@ def test_check_solution(model_or_seed): params, options = process_model_or_seed(model_or_seed) - state_space = rp.solve(params, options) + solve = get_solve_func(params, options) + state_space = solve(params) optim_paras, options = process_params_and_options(params, options) @@ -45,23 +49,26 @@ def test_state_space_restrictions_by_traversing_forward(model): params, options = process_model_or_seed(model) optim_paras, options = process_params_and_options(params, options) - state_space = rp.solve(params, options) + solve = get_solve_func(params, options) + state_space = solve(params) indices = np.full( - (state_space.states.shape[0], len(optim_paras["choices"])), - INDEXER_INVALID_INDEX, + (state_space.core.shape[0], len(optim_paras["choices"])), INDEXER_INVALID_INDEX ) + core_columns = create_core_state_space_columns(optim_paras) for period in range(options["n_periods"] - 1): if period == 0: - states = state_space.get_attribute_from_period("states", period) - else: - indices_period = state_space.get_attribute_from_period( - "indices_of_child_states", period - 1 + states = state_space.core.query("period == 0")[core_columns].to_numpy( + np.int ) + else: + indices_period = state_space.indices_of_child_states[ + state_space.slices_by_periods[period - 1] + ] indices_period = indices_period[indices_period >= 0] - states = state_space.states[indices_period] + states = state_space.core[core_columns].to_numpy(np.int)[indices_period] indices = _insert_indices_of_child_states( indices, @@ -75,10 +82,10 @@ def test_state_space_restrictions_by_traversing_forward(model): # Take all valid indices and add the indices of the first period. set_valid_indices = set(indices[indices != INDEXER_INVALID_INDEX]) | set( - range(state_space.get_attribute_from_period("states", 0).shape[0]) + range(state_space.core.query("period == 0").shape[0]) ) - assert set_valid_indices == set(range(state_space.states.shape[0])) + assert set_valid_indices == set(range(state_space.core.shape[0])) @pytest.mark.parametrize("model_or_seed", EXAMPLE_MODELS) @@ -92,17 +99,32 @@ def test_invariance_of_solution(model_or_seed): optim_paras, options = process_params_and_options(params, options) - state_space = rp.solve(params, options) - state_space_ = rp.solve(params, options) + solve = get_solve_func(params, options) + state_space = solve(params) + state_space_ = solve(params) - np.testing.assert_array_equal(state_space.states, state_space_.states) - np.testing.assert_array_equal(state_space.wages, state_space_.wages) - np.testing.assert_array_equal(state_space.nonpec, state_space_.nonpec) - np.testing.assert_array_equal( - state_space.emax_value_functions, state_space_.emax_value_functions + apply_to_attributes_of_two_state_spaces( + state_space.core, state_space_.core, np.testing.assert_array_equal + ) + apply_to_attributes_of_two_state_spaces( + state_space.get_attribute("wages"), + state_space_.get_attribute("wages"), + np.testing.assert_array_equal, ) - np.testing.assert_array_equal( - state_space.base_draws_sol, state_space_.base_draws_sol + apply_to_attributes_of_two_state_spaces( + state_space.get_attribute("nonpecs"), + state_space_.get_attribute("nonpecs"), + np.testing.assert_array_equal, + ) + apply_to_attributes_of_two_state_spaces( + state_space.get_attribute("expected_value_functions"), + state_space_.get_attribute("expected_value_functions"), + np.testing.assert_array_equal, + ) + apply_to_attributes_of_two_state_spaces( + state_space.get_attribute("base_draws_sol"), + state_space_.get_attribute("base_draws_sol"), + np.testing.assert_array_equal, ) @@ -123,13 +145,13 @@ def test_create_state_space_vs_specialized_kw94(model): states_old, indexer_old = _create_state_space_kw94( n_periods, n_types, edu_starts, edu_max ) - if n_types < 2: + if n_types == 1: states_old = states_old[:, :-1] for i, idx in enumerate(indexer_old): shape = idx.shape indexer_old[i] = idx.reshape(shape[:-2] + (-1,)) - states_new, indexer_new = _create_state_space(optim_paras, options) + states_new, indexer_new = _create_core_and_indexer(optim_paras, options) # Compare the state spaces via sets as ordering changed in some cases. states_old_set = set(map(tuple, states_old)) @@ -167,13 +189,14 @@ def test_create_state_space_vs_specialized_kw97(model): states_old, indexer_old = _create_state_space_kw97_extended( n_periods, n_types, edu_starts, edu_max ) - if n_types < 2: + if n_types == 1: states_old = states_old[:, :-1] for i, idx in enumerate(indexer_old): shape = idx.shape indexer_old[i] = idx.reshape(shape[:-2] + (-1,)) - states_new, indexer_new = _create_state_space(optim_paras, options) + states_new, indexer_new = _create_core_and_indexer(optim_paras, options) + states_new = pd.concat([states_new.copy().assign(type=i) for i in range(4)]) # Compare the state spaces via sets as ordering changed in some cases. states_old_set = set(map(tuple, states_old)) @@ -184,4 +207,5 @@ def test_create_state_space_vs_specialized_kw97(model): for period in range(n_periods): mask_old = indexer_old[period] != INDEXER_INVALID_INDEX mask_new = indexer_new[period] != INDEXER_INVALID_INDEX - assert np.array_equal(mask_old, mask_new) + adj_mask_new = np.repeat(mask_new, 4).reshape(mask_old.shape) + assert np.array_equal(mask_old, adj_mask_new) diff --git a/respy/tests/utils.py b/respy/tests/utils.py index a1195ec0d..56606b2f1 100644 --- a/respy/tests/utils.py +++ b/respy/tests/utils.py @@ -21,3 +21,17 @@ def process_model_or_seed(model_or_seed=None, **kwargs): options["n_periods"] = 3 return params, options + + +def apply_to_attributes_of_two_state_spaces(attr_1, attr_2, func): + """Apply a function to two state space attributes, dense or not. + + Attributes might be `state_space.wages` which can be a dictionary or a Numpy array. + + """ + if isinstance(attr_1, dict): + out = {key: func(attr_1[key], attr_2[key]) for key in attr_1} + else: + out = func(attr_1, attr_2) + + return out diff --git a/setup.py b/setup.py index eb6157042..47e32e7cf 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,7 @@ "respy is a Python package for the simulation and estimation of a prototypical " "finite-horizon dynamic discrete choice model." ) +EMAIL = "respy.9b46528f81292a712fa4855ff362f40f.show-sender@streams.zulipchat.com" README = Path("README.rst").read_text() PROJECT_URLS = { "Bug Tracker": "https://github.com/OpenSourceEconomics/respy/issues", @@ -23,8 +24,8 @@ description=DESCRIPTION, long_description=DESCRIPTION + "\n\n" + README, long_description_content_type="text/x-rst", - author="Philipp Eisenhauer", - author_email="eisenhauer@policy-lab.org", + author="The respy Development Team", + author_email=EMAIL, python_requires=">=3.6.0", url="https://respy.readthedocs.io/en/latest/", project_urls=PROJECT_URLS, @@ -45,7 +46,7 @@ "tests/resources/*.pickle", "tests/resources/*.yaml", "tox.ini", - ], + ] }, include_package_data=True, zip_safe=False, diff --git a/tox.ini b/tox.ini index 0937ba432..3784bd96f 100644 --- a/tox.ini +++ b/tox.ini @@ -15,6 +15,7 @@ conda_deps = codecov conda-build estimagic >= 0.0.14 + joblib mkl numba numpy @@ -78,6 +79,7 @@ per-file-ignores = respy/tests/*:D development/*:D respy/pre_processing/specification_helpers.py:D + respy/parallelization.py:D202,E800 [pytest] junit_family = xunit2 @@ -96,3 +98,4 @@ filterwarnings = ignore: the imp module is deprecated ignore: The probabilities for parameter group ignore: The distribution of initial lagged choices is insufficiently specified + ignore: Observable 'observable_