Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add benefit test that compares actual and target benefit amounts/counts #246

Merged
merged 4 commits into from
Jul 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
CPS_START_YEAR = 2014
PUF_START_YEAR = 2011
PUF_COUNT = 239002
LAST_YEAR = 2027


@pytest.fixture(scope='session')
Expand Down Expand Up @@ -77,6 +78,11 @@ def puf_start_year():
return PUF_START_YEAR


@pytest.fixture(scope='session')
def last_year():
return LAST_YEAR


@pytest.fixture(scope='session')
def cps_weights(test_path):
cpsw_path = os.path.join(test_path, '../cps_stage2/cps_weights.csv.gz')
Expand Down Expand Up @@ -113,3 +119,9 @@ def puf_benefits(test_path):
# pufb_path = os.path.join(test_path, '../puf_stage4/puf_benefits.csv.gz')
# return pd.read_csv(pufb_path)
return None


@pytest.fixture(scope='session')
def growth_rates(test_path):
gr_path = os.path.join(test_path, '../cps_stage4/growth_rates.csv')
return pd.read_csv(gr_path, index_col=0)
123 changes: 123 additions & 0 deletions tests/test_benefits.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import pytest
import numpy as np
import pandas as pd


@pytest.mark.parametrize('kind', ['cps'])
Expand Down Expand Up @@ -74,3 +75,125 @@ def test_benefits(kind, cps_benefits, puf_benefits,
if num_allzeros > 0:
msg = 'number {} records with all zero benefits in every year = {}'
raise ValueError(msg.format(kind, num_allzeros))


@pytest.mark.parametrize('kind', ['cps'])
def test_extrapolated_benefits(kind, cps_benefits, puf_benefits,
cps, puf, cps_weights, puf_weights,
cps_start_year, puf_start_year,
cps_count, puf_count,
growfactors, growth_rates, last_year):
"""
Compare actual and target extrapolated benefit amounts and counts.
(Note that there are no puf_benefits data.)
"""
rtol_amt = 0.13
rtol_cnt = 0.15
dump_res = False
# specify several DataFrames and related parameters
if kind == 'cps':
basedata = cps
benefits = cps_benefits
weights = cps_weights
first_year = cps_start_year
data_count = cps_count
elif kind == 'puf':
basedata = puf
benefits = puf_benefits
weights = puf_weights
first_year = puf_start_year
data_count = puf_count
raise ValueError('illegal kind={}'.format(kind))
else:
raise ValueError('illegal kind={}'.format(kind))
benefit_names = ['ssi', 'mcare', 'mcaid', 'snap', 'wic',
'tanf', 'vet', 'housing']
# expand benefits DataFrame to include those who don't receive benefits
recid_df = pd.DataFrame({'RECID': basedata.RECID})
full_benefits = recid_df.merge(benefits, on='RECID', how='left')
full_benefits.fillna(0, inplace=True)
assert len(recid_df.index) == len(full_benefits.index)
extrapolated_benefits = full_benefits.astype(np.float32)
del recid_df
del full_benefits
assert len(extrapolated_benefits.index) == data_count
# compute benefit amounts and counts for first_year
fyr_amount = dict()
fyr_count = dict()
wght = basedata['s006'] * 0.01
for bname in benefit_names:
ben = basedata['{}_ben'.format(bname)]
benamt = (ben * wght).sum() * 1e-9
fyr_amount[bname] = round(benamt, 3)
bencnt = wght[ben > 0].sum() * 1e-6
fyr_count[bname] = round(bencnt, 3)
if dump_res:
benavg = benamt / bencnt
res = '{} {}\t{:8.3f}{:8.3f}{:8.1f}'.format(first_year, bname,
benamt, bencnt, benavg)
print(res)
# compare actual and target amounts/counts for each subsequent year
differences = False
for year in range(first_year + 1, last_year + 1):
# compute actual amuonts/counts for year
wght = weights['WT{}'.format(year)] * 0.01
actual_amount = dict()
actual_count = dict()
for bname in benefit_names:
ben = extrapolated_benefits['{}_{}'.format(bname, year)]
assert len(ben.index) == len(wght.index)
benamt = (ben * wght).sum() * 1e-9
actual_amount[bname] = round(benamt, 3)
bencnt = wght[ben > 0].sum() * 1e-6
actual_count[bname] = round(bencnt, 3)
if dump_res:
benavg = benamt / bencnt
res = '{} {}\t{:8.3f}{:8.3f}{:8.1f} A'.format(year, bname,
benamt, bencnt,
benavg)
print(res)
# compute target amuonts/counts for year
target_amount = dict()
target_count = dict()
for bname in benefit_names:
benfyr = fyr_amount[bname]
col = '{}_benefit_growth'.format(bname)
benfactor = 1.0 + growth_rates.loc[year, col]
benamt = benfyr * benfactor
target_amount[bname] = round(benamt, 3)
cntfyr = fyr_count[bname]
col = '{}_participation_growth'.format(bname)
cntfactor = 1.0 + growth_rates.loc[year, col]
bencnt = cntfyr * cntfactor
target_count[bname] = round(bencnt, 3)
if dump_res:
benavg = benamt / bencnt
res = '{} {}\t{:8.3f}{:8.3f}{:8.1f} T'.format(year, bname,
benamt, bencnt,
benavg)
print(res)
# compare actual and target amuonts/counts for year
for bname in benefit_names:
if not np.allclose([actual_amount[bname]],
[target_amount[bname]],
atol=0.0, rtol=rtol_amt):
differences = True
reldiff = actual_amount[bname] / target_amount[bname] - 1.0
msg = '{} {}\tAMT\t{:9.3f}{:9.3f}{:8.1f}'
print(msg.format(year, bname,
actual_amount[bname],
target_amount[bname],
reldiff * 100))
if not np.allclose([actual_count[bname]],
[target_count[bname]],
atol=0.0, rtol=rtol_cnt):
differences = True
reldiff = actual_count[bname] / target_count[bname] - 1.0
msg = '{} {}\tCNT\t{:9.3f}{:9.3f}{:8.1f}'
print(msg.format(year, bname,
actual_count[bname],
target_count[bname],
reldiff * 100))
# end of year loop
if differences:
assert 'differences is' == 'True'