PSLmodels · martinholmer · Jul 12, 2018 · Jul 11, 2018 · Jul 11, 2018 · Jul 11, 2018
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -8,6 +8,7 @@
 CPS_START_YEAR = 2014
 PUF_START_YEAR = 2011
 PUF_COUNT = 239002
+LAST_YEAR = 2027
 
 
 @pytest.fixture(scope='session')
@@ -77,6 +78,11 @@ def puf_start_year():
     return PUF_START_YEAR
 
 
+@pytest.fixture(scope='session')
+def last_year():
+    return LAST_YEAR
+
+
 @pytest.fixture(scope='session')
 def cps_weights(test_path):
     cpsw_path = os.path.join(test_path, '../cps_stage2/cps_weights.csv.gz')
@@ -113,3 +119,9 @@ def puf_benefits(test_path):
     # pufb_path = os.path.join(test_path, '../puf_stage4/puf_benefits.csv.gz')
     # return pd.read_csv(pufb_path)
     return None
+
+
+@pytest.fixture(scope='session')
+def growth_rates(test_path):
+    gr_path = os.path.join(test_path, '../cps_stage4/growth_rates.csv')
+    return pd.read_csv(gr_path, index_col=0)
diff --git a/tests/test_benefits.py b/tests/test_benefits.py
@@ -3,6 +3,7 @@
 """
 import pytest
 import numpy as np
+import pandas as pd
 
 
 @pytest.mark.parametrize('kind', ['cps'])
@@ -74,3 +75,125 @@ def test_benefits(kind, cps_benefits, puf_benefits,
     if num_allzeros > 0:
         msg = 'number {} records with all zero benefits in every year = {}'
         raise ValueError(msg.format(kind, num_allzeros))
+
+
+@pytest.mark.parametrize('kind', ['cps'])
+def test_extrapolated_benefits(kind, cps_benefits, puf_benefits,
+                               cps, puf, cps_weights, puf_weights,
+                               cps_start_year, puf_start_year,
+                               cps_count, puf_count,
+                               growfactors, growth_rates, last_year):
+    """
+    Compare actual and target extrapolated benefit amounts and counts.
+    (Note that there are no puf_benefits data.)
+    """
+    rtol_amt = 0.13
+    rtol_cnt = 0.15
+    dump_res = False
+    # specify several DataFrames and related parameters
+    if kind == 'cps':
+        basedata = cps
+        benefits = cps_benefits
+        weights = cps_weights
+        first_year = cps_start_year
+        data_count = cps_count
+    elif kind == 'puf':
+        basedata = puf
+        benefits = puf_benefits
+        weights = puf_weights
+        first_year = puf_start_year
+        data_count = puf_count
+        raise ValueError('illegal kind={}'.format(kind))
+    else:
+        raise ValueError('illegal kind={}'.format(kind))
+    benefit_names = ['ssi', 'mcare', 'mcaid', 'snap', 'wic',
+                     'tanf', 'vet', 'housing']
+    # expand benefits DataFrame to include those who don't receive benefits
+    recid_df = pd.DataFrame({'RECID': basedata.RECID})
+    full_benefits = recid_df.merge(benefits, on='RECID', how='left')
+    full_benefits.fillna(0, inplace=True)
+    assert len(recid_df.index) == len(full_benefits.index)
+    extrapolated_benefits = full_benefits.astype(np.float32)
+    del recid_df
+    del full_benefits
+    assert len(extrapolated_benefits.index) == data_count
+    # compute benefit amounts and counts for first_year
+    fyr_amount = dict()
+    fyr_count = dict()
+    wght = basedata['s006'] * 0.01
+    for bname in benefit_names:
+        ben = basedata['{}_ben'.format(bname)]
+        benamt = (ben * wght).sum() * 1e-9
+        fyr_amount[bname] = round(benamt, 3)
+        bencnt = wght[ben > 0].sum() * 1e-6
+        fyr_count[bname] = round(bencnt, 3)
+        if dump_res:
+            benavg = benamt / bencnt
+            res = '{} {}\t{:8.3f}{:8.3f}{:8.1f}'.format(first_year, bname,
+                                                        benamt, bencnt, benavg)
+            print(res)
+    # compare actual and target amounts/counts for each subsequent year
+    differences = False
+    for year in range(first_year + 1, last_year + 1):
+        # compute actual amuonts/counts for year
+        wght = weights['WT{}'.format(year)] * 0.01
+        actual_amount = dict()
+        actual_count = dict()
+        for bname in benefit_names:
+            ben = extrapolated_benefits['{}_{}'.format(bname, year)]
+            assert len(ben.index) == len(wght.index)
+            benamt = (ben * wght).sum() * 1e-9
+            actual_amount[bname] = round(benamt, 3)
+            bencnt = wght[ben > 0].sum() * 1e-6
+            actual_count[bname] = round(bencnt, 3)
+            if dump_res:
+                benavg = benamt / bencnt
+                res = '{} {}\t{:8.3f}{:8.3f}{:8.1f} A'.format(year, bname,
+                                                              benamt, bencnt,
+                                                              benavg)
+                print(res)
+        # compute target amuonts/counts for year
+        target_amount = dict()
+        target_count = dict()
+        for bname in benefit_names:
+            benfyr = fyr_amount[bname]
+            col = '{}_benefit_growth'.format(bname)
+            benfactor = 1.0 + growth_rates.loc[year, col]
+            benamt = benfyr * benfactor
+            target_amount[bname] = round(benamt, 3)
+            cntfyr = fyr_count[bname]
+            col = '{}_participation_growth'.format(bname)
+            cntfactor = 1.0 + growth_rates.loc[year, col]
+            bencnt = cntfyr * cntfactor
+            target_count[bname] = round(bencnt, 3)
+            if dump_res:
+                benavg = benamt / bencnt
+                res = '{} {}\t{:8.3f}{:8.3f}{:8.1f} T'.format(year, bname,
+                                                              benamt, bencnt,
+                                                              benavg)
+                print(res)
+        # compare actual and target amuonts/counts for year
+        for bname in benefit_names:
+            if not np.allclose([actual_amount[bname]],
+                               [target_amount[bname]],
+                               atol=0.0, rtol=rtol_amt):
+                differences = True
+                reldiff = actual_amount[bname] / target_amount[bname] - 1.0
+                msg = '{} {}\tAMT\t{:9.3f}{:9.3f}{:8.1f}'
+                print(msg.format(year, bname,
+                                 actual_amount[bname],
+                                 target_amount[bname],
+                                 reldiff * 100))
+            if not np.allclose([actual_count[bname]],
+                               [target_count[bname]],
+                               atol=0.0, rtol=rtol_cnt):
+                differences = True
+                reldiff = actual_count[bname] / target_count[bname] - 1.0
+                msg = '{} {}\tCNT\t{:9.3f}{:9.3f}{:8.1f}'
+                print(msg.format(year, bname,
+                                 actual_count[bname],
+                                 target_count[bname],
+                                 reldiff * 100))
+    # end of year loop
+    if differences:
+        assert 'differences is' == 'True'