diff --git a/tests/conftest.py b/tests/conftest.py index 76c291f..9450c10 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,7 @@ CPS_START_YEAR = 2014 PUF_START_YEAR = 2011 PUF_COUNT = 239002 +LAST_YEAR = 2027 @pytest.fixture(scope='session') @@ -77,6 +78,11 @@ def puf_start_year(): return PUF_START_YEAR +@pytest.fixture(scope='session') +def last_year(): + return LAST_YEAR + + @pytest.fixture(scope='session') def cps_weights(test_path): cpsw_path = os.path.join(test_path, '../cps_stage2/cps_weights.csv.gz') @@ -113,3 +119,9 @@ def puf_benefits(test_path): # pufb_path = os.path.join(test_path, '../puf_stage4/puf_benefits.csv.gz') # return pd.read_csv(pufb_path) return None + + +@pytest.fixture(scope='session') +def growth_rates(test_path): + gr_path = os.path.join(test_path, '../cps_stage4/growth_rates.csv') + return pd.read_csv(gr_path, index_col=0) diff --git a/tests/test_benefits.py b/tests/test_benefits.py index cdf190f..154a26c 100644 --- a/tests/test_benefits.py +++ b/tests/test_benefits.py @@ -3,6 +3,7 @@ """ import pytest import numpy as np +import pandas as pd @pytest.mark.parametrize('kind', ['cps']) @@ -74,3 +75,125 @@ def test_benefits(kind, cps_benefits, puf_benefits, if num_allzeros > 0: msg = 'number {} records with all zero benefits in every year = {}' raise ValueError(msg.format(kind, num_allzeros)) + + +@pytest.mark.parametrize('kind', ['cps']) +def test_extrapolated_benefits(kind, cps_benefits, puf_benefits, + cps, puf, cps_weights, puf_weights, + cps_start_year, puf_start_year, + cps_count, puf_count, + growfactors, growth_rates, last_year): + """ + Compare actual and target extrapolated benefit amounts and counts. + (Note that there are no puf_benefits data.) + """ + rtol_amt = 0.13 + rtol_cnt = 0.15 + dump_res = False + # specify several DataFrames and related parameters + if kind == 'cps': + basedata = cps + benefits = cps_benefits + weights = cps_weights + first_year = cps_start_year + data_count = cps_count + elif kind == 'puf': + basedata = puf + benefits = puf_benefits + weights = puf_weights + first_year = puf_start_year + data_count = puf_count + raise ValueError('illegal kind={}'.format(kind)) + else: + raise ValueError('illegal kind={}'.format(kind)) + benefit_names = ['ssi', 'mcare', 'mcaid', 'snap', 'wic', + 'tanf', 'vet', 'housing'] + # expand benefits DataFrame to include those who don't receive benefits + recid_df = pd.DataFrame({'RECID': basedata.RECID}) + full_benefits = recid_df.merge(benefits, on='RECID', how='left') + full_benefits.fillna(0, inplace=True) + assert len(recid_df.index) == len(full_benefits.index) + extrapolated_benefits = full_benefits.astype(np.float32) + del recid_df + del full_benefits + assert len(extrapolated_benefits.index) == data_count + # compute benefit amounts and counts for first_year + fyr_amount = dict() + fyr_count = dict() + wght = basedata['s006'] * 0.01 + for bname in benefit_names: + ben = basedata['{}_ben'.format(bname)] + benamt = (ben * wght).sum() * 1e-9 + fyr_amount[bname] = round(benamt, 3) + bencnt = wght[ben > 0].sum() * 1e-6 + fyr_count[bname] = round(bencnt, 3) + if dump_res: + benavg = benamt / bencnt + res = '{} {}\t{:8.3f}{:8.3f}{:8.1f}'.format(first_year, bname, + benamt, bencnt, benavg) + print(res) + # compare actual and target amounts/counts for each subsequent year + differences = False + for year in range(first_year + 1, last_year + 1): + # compute actual amuonts/counts for year + wght = weights['WT{}'.format(year)] * 0.01 + actual_amount = dict() + actual_count = dict() + for bname in benefit_names: + ben = extrapolated_benefits['{}_{}'.format(bname, year)] + assert len(ben.index) == len(wght.index) + benamt = (ben * wght).sum() * 1e-9 + actual_amount[bname] = round(benamt, 3) + bencnt = wght[ben > 0].sum() * 1e-6 + actual_count[bname] = round(bencnt, 3) + if dump_res: + benavg = benamt / bencnt + res = '{} {}\t{:8.3f}{:8.3f}{:8.1f} A'.format(year, bname, + benamt, bencnt, + benavg) + print(res) + # compute target amuonts/counts for year + target_amount = dict() + target_count = dict() + for bname in benefit_names: + benfyr = fyr_amount[bname] + col = '{}_benefit_growth'.format(bname) + benfactor = 1.0 + growth_rates.loc[year, col] + benamt = benfyr * benfactor + target_amount[bname] = round(benamt, 3) + cntfyr = fyr_count[bname] + col = '{}_participation_growth'.format(bname) + cntfactor = 1.0 + growth_rates.loc[year, col] + bencnt = cntfyr * cntfactor + target_count[bname] = round(bencnt, 3) + if dump_res: + benavg = benamt / bencnt + res = '{} {}\t{:8.3f}{:8.3f}{:8.1f} T'.format(year, bname, + benamt, bencnt, + benavg) + print(res) + # compare actual and target amuonts/counts for year + for bname in benefit_names: + if not np.allclose([actual_amount[bname]], + [target_amount[bname]], + atol=0.0, rtol=rtol_amt): + differences = True + reldiff = actual_amount[bname] / target_amount[bname] - 1.0 + msg = '{} {}\tAMT\t{:9.3f}{:9.3f}{:8.1f}' + print(msg.format(year, bname, + actual_amount[bname], + target_amount[bname], + reldiff * 100)) + if not np.allclose([actual_count[bname]], + [target_count[bname]], + atol=0.0, rtol=rtol_cnt): + differences = True + reldiff = actual_count[bname] / target_count[bname] - 1.0 + msg = '{} {}\tCNT\t{:9.3f}{:9.3f}{:8.1f}' + print(msg.format(year, bname, + actual_count[bname], + target_count[bname], + reldiff * 100)) + # end of year loop + if differences: + assert 'differences is' == 'True'