Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Impute elective DC pension contributions in PUF data #279

Merged
merged 13 commits into from
Sep 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ puf_data/cps-matched-puf.csv: $(PM_PY_FILES) \

puf_data/puf.csv: puf_data/finalprep.py \
puf_data/impute_itmexp.py \
puf_data/impute_pencon.py \
puf_data/cps-matched-puf.csv
cd puf_data ; python finalprep.py

Expand Down
Binary file modified cps_data/cps.csv.gz
Binary file not shown.
15 changes: 9 additions & 6 deletions cps_data/finalprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,21 @@ def main():
data = add_agi_bin(data, 'INCOME')
print('Adjusting distribution')
data = adjust(data, adj_targets)
print('Adding Benefits Data')
print('Adding benefits variables')
data = benefits(data, other_ben)
print('Dropping unused variables')
data = drop_vars(data)
print('Adding zero pencon_p and pencon_s variables')
data['pencon_p'] = np.zeros(len(data.index), dtype=np.int32)
data['pencon_s'] = np.zeros(len(data.index), dtype=np.int32)

data = data.fillna(0.)
data = data.astype(np.int32)
data['e00200'] = data['e00200p'] + data['e00200s']
data['e00900'] = data['e00900p'] + data['e00900s']
data['e02100'] = data['e02100p'] + data['e02100s']
data['s006'] *= 100

print('Exporting...')
data.to_csv('cps.csv', index=False)
subprocess.check_call(["gzip", "-nf", "cps.csv"])
Expand Down Expand Up @@ -344,7 +348,7 @@ def benefits(data, other_ben):
benefits variable.
Replaces Medicare and Medicaid values with set amounts
"""
# replace medicare and medicaid
# Replace Medicare and Medicaid
medicare_cols = 'MCARE_VAL' + pd.Series((np.arange(15) + 1).astype(str))
medicaid_cols = 'MCAID_VAL' + pd.Series((np.arange(15) + 1).astype(str))
count_medicare = data[medicare_cols].astype(bool).sum(axis=1)
Expand All @@ -367,13 +371,12 @@ def benefits(data, other_ben):
data['snap_ben'])
data['ratio'] = (data['dist_ben'] * data['s006'] /
(data['dist_ben'] * data['s006']).sum())
# remove TANF and WIC from other_ben
# ... remove TANF and WIC from other_ben total
tanf = (data['tanf_ben'] * data['s006']).sum()
wic = (data['wic_ben'] * data['s006']).sum()
other_ben_total = other_ben['2014_cost'].sum() - tanf - wic
# divide by the weight to account for weighting in Tax-Calculator
data['other_ben'] = (data['ratio'] * other_ben_total /
data['s006'])
# ... divide by the weight to account for weighting in Tax-Calculator
data['other_ben'] = (data['ratio'] * other_ben_total / data['s006'])

# Convert benefit data to integers
data['mcaid_ben'] = data['mcaid_ben'].astype(np.int32)
Expand Down
4 changes: 4 additions & 0 deletions puf_data/finalprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import pandas
from impute_itmexp import impute_itemized_expenses
from impute_pencon import impute_pension_contributions


BENPUF = False # set temporarily to True to generate a benpuf.csv file
Expand Down Expand Up @@ -72,6 +73,9 @@ def main():
# - Impute itemized expense amounts for non-itemizers:
data = impute_itemized_expenses(data.copy())

# - Impute pension contributions:
data = impute_pension_contributions(data.copy())

# - Write processed data to the final CSV-formatted file:
if BENPUF:
data.to_csv('benpuf.csv', index=False)
Expand Down
372 changes: 372 additions & 0 deletions puf_data/impute_pencon.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions tests/cps_agg_expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ nu05 45400 0 4
nu13 163487 0 10
nu18 281400 0 12
other_ben 462782071 0 40211
pencon_p 0 0 0
pencon_s 0 0 0
s006 16296813000 100 561900
snap_ben 141713659 0 26569
ssi_ben 103019670 0 64378
Expand Down
8 changes: 5 additions & 3 deletions tests/puf_agg_expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ age_head 11691928 1 85
age_spouse 6247637 0 97
agi_bin 1890431 0 18
cmbtp 4316456617 -23842830 83355100
e00200 39430408391 0 56530000
e00200p 23634573247 0 28265000
e00200s 15795835144 0 28265000
e00200 39430402348 0 56530000
e00200p 23634568315 0 28265000
e00200s 15795834033 0 28265000
e00300 3551458380 0 29460000
e00400 2471800692 0 14350000
e00600 5859462558 0 37050000
Expand Down Expand Up @@ -85,4 +85,6 @@ nu18 149661 0 6
p08000 829842 0 31600
p22250 -603210228 -124900000 39410000
p23250 23317338986 -28160000 91220000
pencon_p 442334767 0 16500
pencon_s 273078831 0 16500
s006 16355017483 1 1043269
8 changes: 7 additions & 1 deletion tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ def relationships(data, dataname):
m = less_than_str.format(dataname, 'e01700', 'e01500')
assert np.all(data['e01500'] >= data['e01700']), m

m = less_than_str.format(dataname, 'pencon_p', 'e00200p+pencon_p')
assert np.all((data['e00200p'] + data['pencon_p']) >= data['pencon_p']), m

m = less_than_str.format(dataname, 'pencon_s', 'e00200s+pencon_s')
assert np.all((data['e00200s'] + data['pencon_s']) >= data['pencon_s']), m


def variable_check(test_path, data, dataname):
"""
Expand Down Expand Up @@ -188,7 +194,7 @@ def check_cps_benefits(data):
'wic': 4972,
'tanf': 159407, # <--- SEEMS ABSURD ($13,284/month)
'housing': 53253,
'vet': 169920, # <--- HIGH ($14,160/month)VA hospital costs or what?
'vet': 169920, # <--- HIGH ($14,160/month) military pension or what?
'other': 40211
}
# .. minimum value per filing unit for positive benefit
Expand Down