Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move logic for reading JSON parameter revisions to Parameter class #2312

Merged
merged 6 commits into from
May 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 26 additions & 175 deletions taxcalc/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@
#
# pylint: disable=invalid-name,no-value-for-parameter,too-many-lines

import os
import re
import copy
import requests
import numpy as np
import pandas as pd
from taxcalc.calcfunctions import (TaxInc, SchXYZTax, GainsTax, AGIsurtax,
Expand All @@ -31,8 +28,7 @@
from taxcalc.consumption import Consumption
from taxcalc.growdiff import GrowDiff
from taxcalc.growfactors import GrowFactors
from taxcalc.utils import (json_to_dict,
DIST_VARIABLES, create_distribution_table,
from taxcalc.utils import (DIST_VARIABLES, create_distribution_table,
DIFF_VARIABLES, create_difference_table,
create_diagnostic_table,
ce_aftertax_expanded_income,
Expand Down Expand Up @@ -188,16 +184,24 @@ def total_weight(self):
"""
return self.array('s006').sum()

def dataframe(self, variable_list):
def dataframe(self, variable_list, all_vars=False):
"""
Return Pandas DataFrame containing the listed variables from embedded
Records object.
Return Pandas DataFrame containing the listed variables from the
embedded Records object. If all_vars is True, then the variable_list
is ignored and all variables used as input to and calculated by the
Calculator.calc_all() method (which does not include marginal tax
rates) are included in the returned Pandas DataFrame.
"""
assert isinstance(variable_list, list)
arys = [self.array(vname) for vname in variable_list]
dframe = pd.DataFrame(data=np.column_stack(arys),
columns=variable_list)
if all_vars:
varlist = list(self.__records.USABLE_READ_VARS |
self.__records.CALCULATED_VARS)
else:
assert isinstance(variable_list, list)
varlist = variable_list
arys = [self.array(varname) for varname in varlist]
dframe = pd.DataFrame(data=np.column_stack(arys), columns=varlist)
del arys
del varlist
return dframe

def distribution_table_dataframe(self):
Expand Down Expand Up @@ -310,7 +314,7 @@ def consump_benval_params(self):
@property
def reform_warnings(self):
"""
Calculator class embedded Policy object's reform_warnings.
Calculator class embedded Policy object's parameter_warnings.
"""
return self.__policy.parameter_warnings

Expand Down Expand Up @@ -1060,7 +1064,7 @@ def decile_graph(self, calc,
def read_json_param_objects(reform, assump):
"""
Read JSON reform and assump objects and
return a single dictionary containing four key:dict pairs:
return a composite dictionary containing four key:dict pairs:
'policy':dict, 'consumption':dict,
'growdiff_baseline':dict, and 'growdiff_response':dict.

Expand All @@ -1069,15 +1073,14 @@ def read_json_param_objects(reform, assump):
If assump is None, the dict in all the other key:dict pairs is empty.

Also note that either of the two function arguments can be strings
containing a valid JSON string (rather than a local filename),
in which case the file reading is skipped and the appropriate
read_json_*_text method is called.
containing a valid JSON string (rather than a local filename).

Either of the two function arguments can also be a valid URL string
beginning with 'http' and pointing to a valid JSON file hosted online.

The reform file/URL contents or JSON string must be like this:
{"policy": {...}}
{"policy": {...}} OR {...}
(in other words, the top-level policy key is optional)
and the assump file/URL contents or JSON string must be like this:
{"consumption": {...},
"growdiff_baseline": {...},
Expand All @@ -1098,59 +1101,12 @@ def read_json_param_objects(reform, assump):
The 'growdiff_response' subdictionary of the returned dictionary is
suitable as input into the GrowDiff.update_growdiff method.
"""
# pylint: disable=too-many-branches
# first process the second assump argument
if assump is None:
cons_dict = dict()
gdiff_base_dict = dict()
gdiff_resp_dict = dict()
elif isinstance(assump, str):
if os.path.isfile(assump):
if not assump.endswith('.json'):
msg = "assump does not end with '.json': {}"
raise ValueError(msg.format(assump))
txt = open(assump, 'r').read()
elif assump.startswith('http'):
if not assump.endswith('.json'):
msg = "assump does not end with '.json': {}"
raise ValueError(msg.format(assump))
req = requests.get(assump)
req.raise_for_status()
txt = req.text
else:
txt = assump
(cons_dict,
gdiff_base_dict,
gdiff_resp_dict) = Calculator._read_json_econ_assump_text(txt)
else:
raise ValueError('assump is neither None nor string')
# next process the first reform argument
if reform is None:
rpol_dict = dict()
elif isinstance(reform, str):
if os.path.isfile(reform):
if not reform.endswith('.json'):
msg = "reform does not end with '.json': {}"
raise ValueError(msg.format(reform))
txt = open(reform, 'r').read()
elif reform.startswith('http'):
if not reform.endswith('.json'):
msg = "reform does not end with '.json': {}"
raise ValueError(msg.format(reform))
req = requests.get(reform)
req.raise_for_status()
txt = req.text
else:
txt = reform
rpol_dict = Calculator._read_json_policy_reform_text(txt)
else:
raise ValueError('reform is neither None nor string')
# construct single composite dictionary
# construct the composite dictionary
param_dict = dict()
param_dict['policy'] = rpol_dict
param_dict['consumption'] = cons_dict
param_dict['growdiff_baseline'] = gdiff_base_dict
param_dict['growdiff_response'] = gdiff_resp_dict
param_dict['policy'] = Policy.read_json_reform(reform)
param_dict['consumption'] = Consumption.read_json_update(assump)
for topkey in ['growdiff_baseline', 'growdiff_response']:
param_dict[topkey] = GrowDiff.read_json_update(assump, topkey)
# return the composite dictionary
return param_dict

Expand Down Expand Up @@ -1481,108 +1437,3 @@ def _calc_one_year(self, zero_out_calc_vars=False):
C1040(self.__policy, self.__records)
CTC_new(self.__policy, self.__records)
IITAX(self.__policy, self.__records)

@staticmethod
def _read_json_policy_reform_text(text_string):
"""
Strip //-comments from text_string and return 1 dict based on the JSON.

Specified text is JSON with 1 high-level key:object pair:
a "policy": {...} pair.

Other keys such as "consumption", "growdiff_baseline", or
"growdiff_response" will raise a ValueError.

The {...} object may be empty (that is, be {}), or
may contain one or more pairs with parameter string primary keys and
string years as secondary keys. See test_json_reform_url() in the
tests/test_calculator.py for an extended example of a commented JSON
policy reform text that can be read by this method.

Returned dictionaries pr_dict has string parameters as primary keys and
integer years as secondary keys (that is, they have a param:year:value
format). These returned dictionaries are suitable as the arguments to
the Policy.implement_reform(pr_dict) method.
"""
# strip out //-comments without changing line numbers
json_str = re.sub('//.*', ' ', text_string)
# convert JSON text into a Python dictionary
full_dict = json_to_dict(json_str)
# check key contents of dictionary
actual_keys = set(full_dict.keys())
missing_keys = Calculator.REQUIRED_REFORM_KEYS - actual_keys
if missing_keys:
msg = 'required key(s) "{}" missing from policy reform file'
raise ValueError(msg.format(missing_keys))
illegal_keys = actual_keys - Calculator.REQUIRED_REFORM_KEYS
if illegal_keys:
msg = 'illegal key(s) "{}" in policy reform file'
raise ValueError(msg.format(illegal_keys))
# return the converted full_dict['policy'] dictionary
return Calculator._convert_year_to_int(full_dict['policy'])

@staticmethod
def _read_json_econ_assump_text(text_string):
"""
Strip //-comments from text_string and return 3 dict based on the JSON.

Specified text is JSON with 3 high-level key:value pairs:
a "consumption": {...} pair,
a "growdiff_baseline": {...} pair, and
a "growdiff_response": {...} pair.

Other keys such as "policy" will raise a ValueError.

The {...} object may be empty (that is, be {}), or
may contain one or more pairs with parameter string primary keys and
string years as secondary keys. See test_json_assump_url() in the
tests/test_calculator.py for an extended example of a commented JSON
economic assumption text that can be read by this method.

Returned dictionaries (cons_dict, gdiff_baseline_dict,
gdiff_respose_dict) have string parameters as primary keys and
integer years as secondary keys (that is, they have a param:year:value
format). These returned dictionaries are suitable as the arguments to
the Consumption.update_consumption(cons_dict) method, or
the GrowDiff.update_growdiff(gdiff_dict) method.
"""
# strip out //-comments without changing line numbers
json_str = re.sub('//.*', ' ', text_string)
# convert JSON text into a Python dictionary
full_dict = json_to_dict(json_str)
# check key contents of dictionary
actual_keys = set(full_dict.keys())
missing_keys = Calculator.REQUIRED_ASSUMP_KEYS - actual_keys
if missing_keys:
msg = 'required key(s) "{}" missing from economic assumption file'
raise ValueError(msg.format(missing_keys))
illegal_keys = actual_keys - Calculator.REQUIRED_ASSUMP_KEYS
if illegal_keys:
msg = 'illegal key(s) "{}" in economic assumption file'
raise ValueError(msg.format(illegal_keys))
# return the converted assumption dictionaries in full_dict as a tuple
return (
Calculator._convert_year_to_int(full_dict['consumption']),
Calculator._convert_year_to_int(full_dict['growdiff_baseline']),
Calculator._convert_year_to_int(full_dict['growdiff_response'])
)

@staticmethod
def _convert_year_to_int(syr_dict):
"""
Converts specified syr_dict, which has string years as secondary
keys, into a dictionary with the same structure but having integer
years as secondary keys.
"""
iyr_dict = dict()
for pkey, sdict in syr_dict.items():
assert isinstance(pkey, str)
assert pkey not in iyr_dict # will catch duplicate primary keys
iyr_dict[pkey] = dict()
assert isinstance(sdict, dict)
for skey, val in sdict.items():
assert isinstance(skey, str)
year = int(skey)
assert year not in iyr_dict[pkey] # will catch duplicate years
iyr_dict[pkey][year] = val
return iyr_dict
10 changes: 10 additions & 0 deletions taxcalc/consumption.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ def __init__(self):
self.initialize(Consumption.JSON_START_YEAR,
Consumption.DEFAULT_NUM_YEARS)

@staticmethod
def read_json_update(obj):
"""
Return a revision dictionary suitable for use with update_consumption
method derived from the specified JSON object, which can be None or
a string containing a local filename, a URL beginning with 'http'
pointing to a valid JSON file hosted online, or a valid JSON text.
"""
return Parameters._read_json_revision(obj, 'consumption')

def update_consumption(self, revision,
print_warnings=True, raise_errors=True):
"""
Expand Down
11 changes: 11 additions & 0 deletions taxcalc/growdiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ def __init__(self):
self.initialize(GrowDiff.JSON_START_YEAR,
GrowDiff.DEFAULT_NUM_YEARS)

@staticmethod
def read_json_update(obj, topkey):
"""
Return a revision dictionary suitable for use with update_growdiff
method generated from the specified JSON object, which can be None or
a string containing a local filename, a URL beginning with 'http'
pointing to a valid JSON file hosted online, or a valid JSON text.
"""
assert topkey in ('growdiff_baseline', 'growdiff_response')
return Parameters._read_json_revision(obj, topkey)

def update_growdiff(self, revision,
print_warnings=True, raise_errors=True):
"""
Expand Down
71 changes: 71 additions & 0 deletions taxcalc/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
# pylint: disable=attribute-defined-outside-init,no-member

import os
import re
import abc
from collections import OrderedDict
import requests
import numpy as np
from taxcalc.utils import read_egg_json, json_to_dict

Expand Down Expand Up @@ -848,3 +850,72 @@ def _apply_cpi_offset_in_revision(self, revision):
if name not in known_years:
known_years[name] = kyrs_not_in_revision
return known_years

@staticmethod
def _read_json_revision(obj, topkey):
"""
Read JSON revision specified by obj and topkey
returning a single revision dictionary suitable for
use with the Parameters._update method.

The obj function argument can be None or a string, where the
string contains a local filename, a URL beginning with 'http'
pointing to a valid JSON file hosted online, or valid JSON
text.

The topkey argument must be a string containing the top-level
key in a compound-revision JSON text for which a revision
dictionary is returned. If the specified topkey is not among
the top-level JSON keys, the obj is assumed to be a
non-compound-revision JSON text for the specified topkey.
"""
# embedded function used only in _read_json_revision staticmethod
def convert_year_to_int(syr_dict):
"""
Converts specified syr_dict, which has string years as secondary
keys, into a dictionary with the same structure but having integer
years as secondary keys.
"""
iyr_dict = dict()
for pkey, sdict in syr_dict.items():
assert isinstance(pkey, str)
iyr_dict[pkey] = dict()
assert isinstance(sdict, dict)
for skey, val in sdict.items():
assert isinstance(skey, str)
year = int(skey)
iyr_dict[pkey][year] = val
return iyr_dict
# end of embedded function
# process the main function arguments
if obj is None:
return dict()
if not isinstance(obj, str):
raise ValueError('obj is neither None nor a string')
if not isinstance(topkey, str):
raise ValueError('topkey={} is not a string'.format(topkey))
if os.path.isfile(obj):
if not obj.endswith('.json'):
msg = 'obj does not end with ".json": {}'
raise ValueError(msg.format(obj))
txt = open(obj, 'r').read()
elif obj.startswith('http'):
if not obj.endswith('.json'):
msg = 'obj does not end with ".json": {}'
raise ValueError(msg.format(obj))
req = requests.get(obj)
req.raise_for_status()
txt = req.text
else:
txt = obj
# strip out //-comments without changing line numbers
json_txt = re.sub('//.*', ' ', txt)
# convert JSON text into a Python dictionary
full_dict = json_to_dict(json_txt)
# check top-level key contents of dictionary
if topkey in full_dict.keys():
single_dict = full_dict[topkey]
else:
single_dict = full_dict
# convert string year to integer year in dictionary and return
return convert_year_to_int(single_dict)
10 changes: 10 additions & 0 deletions taxcalc/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ def wage_growth_rates(self):
"""
return self._wage_growth_rates

@staticmethod
def read_json_reform(obj):
"""
Return a reform dictionary suitable for use with implement_reform
method generated from the specified JSON object, which can be None or
a string containing a local filename, a URL beginning with 'http'
pointing to a valid JSON file hosted online, or a valid JSON text.
"""
return Parameters._read_json_revision(obj, 'policy')

def implement_reform(self, reform,
print_warnings=True, raise_errors=True):
"""
Expand Down
Loading