diff --git a/taxcalc/calculate.py b/taxcalc/calculate.py index ffea01a27..f8439bc8d 100644 --- a/taxcalc/calculate.py +++ b/taxcalc/calculate.py @@ -398,14 +398,12 @@ def distribution_tables(self, calc, result_type='weighted_sum'): """ Get results from self and calc, sort them based on groupby using - income_measure, manipulate grouped statistics based on result_type, + income_measure, compute grouped statistics based on result_type, and return tables as a pair of Pandas dataframes. This method leaves the Calculator object(s) unchanged. Note that the returned tables have consistent income groups (based on the self income_measure) even though the baseline income_measure in self and the income_measure in calc are different. - Also, note that some subgroups may contain filing units with negative - or zero baseline (self) income. Parameters ---------- @@ -414,29 +412,37 @@ def distribution_tables(self, calc, if calc is None, the second returned table is None groupby : String object - options for input: 'weighted_deciles', 'webapp_income_bins', + options for input: 'weighted_deciles', 'standard_income_bins', 'large_income_bins', 'small_income_bins'; - determines how the columns in returned tables are sorted - NOTE: when groupby is 'weighted_deciles', the returned table has three - extra rows containing top-decile detail consisting of statistics - for the 0.90-0.95 quantile range (bottom half of top decile), - for the 0.95-0.99 quantile range, and - for the 0.99-1.00 quantile range (top one percent). + determines how the columns in resulting Pandas DataFrame are sorted income_measure : String object options for input: 'expanded_income' or 'c00100'(AGI) + specifies statistic used to place filing units in bins or deciles result_type : String object options for input: 'weighted_sum' or 'weighted_avg'; - determines how whether or not table entries are averages or totals + determines how the table statistices are computed - Typical usage - ------------- + Return and typical usage + ------------------------ dist1, dist2 = calc1.distribution_tables(calc2) OR dist1, _ = calc1.distribution_tables(None) (where calc1 is a baseline Calculator object - and calc2 is a reform Calculator object) + and calc2 is a reform Calculator object). + Each of the dist1 and optional dist2 is a distribution table as a + Pandas DataFrame with DIST_TABLE_COLUMNS and groupby rows. + NOTE: when groupby is 'weighted_deciles', the returned tables have 3 + extra rows containing top-decile detail consisting of statistics + for the 0.90-0.95 quantile range (bottom half of top decile), + for the 0.95-0.99 quantile range, and + for the 0.99-1.00 quantile range (top one percent); and the + returned table splits the bottom decile into filing units with + negative (denoted by a 0-10n row label), + zero (denoted by a 0-10z row label), and + positive (denoted by a 0-10p row label) values of the + specified income_measure. """ # nested function used only by this method def have_same_income_measure(calc1, calc2, income_measure): @@ -495,8 +501,6 @@ def difference_table(self, calc, in self and the income_measure in calc are different. Note that filing units are put into groupby categories using the specified income_measure in the baseline (self) situation. - Also, note that some subgroups may contain filing units with negative - or zero baseline (self) income. Parameters ---------- @@ -504,27 +508,35 @@ def difference_table(self, calc, calc represents the reform while self represents the baseline groupby : String object - options for input: 'weighted_deciles', 'webapp_income_bins', + options for input: 'weighted_deciles', 'standard_income_bins', 'large_income_bins', 'small_income_bins'; - determines how the columns in returned tables are sorted - NOTE: when groupby is 'weighted_deciles', the returned table has three - extra rows containing top-decile detail consisting of statistics - for the 0.90-0.95 quantile range (bottom half of top decile), - for the 0.95-0.99 quantile range, and - for the 0.99-1.00 quantile range (top one percent). + determines how the columns in resulting Pandas DataFrame are sorted income_measure : String object options for input: 'expanded_income' or 'c00100'(AGI) + specifies statistic used to place filing units in bins or deciles tax_to_diff : String object options for input: 'iitax', 'payrolltax', 'combined' specifies which tax to difference - Typical usage - ------------- + Returns and typical usage + ------------------------- diff = calc1.difference_table(calc2) (where calc1 is a baseline Calculator object - and calc2 is a reform Calculator object) + and calc2 is a reform Calculator object). + The returned diff is a difference table as a Pandas DataFrame + with DIST_TABLE_COLUMNS and groupby rows. + NOTE: when groupby is 'weighted_deciles', the returned table has three + extra rows containing top-decile detail consisting of statistics + for the 0.90-0.95 quantile range (bottom half of top decile), + for the 0.95-0.99 quantile range, and + for the 0.99-1.00 quantile range (top one percent); and the + returned table splits the bottom decile into filing units with + negative (denoted by a 0-10n row label), + zero (denoted by a 0-10z row label), and + positive (denoted by a 0-10p row label) values of the + specified income_measure. """ assert isinstance(calc, Calculator) assert calc.current_year == self.current_year diff --git a/taxcalc/reforms/2017_law.out b/taxcalc/reforms/2017_law.out index aec8106ca..888e07855 100644 --- a/taxcalc/reforms/2017_law.out +++ b/taxcalc/reforms/2017_law.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 12995 16792 30213 3466 489 9180 54921 -5 0 0 0 0 0 0 0 0 -6 180000 30322 30375 119303 16514 13514 27540 152716 -7 0 0 0 0 0 0 0 0 -8 240000 25990 23584 190426 32344 30844 36720 190796 -9 600000 18875 20375 560750 142810 142810 66067 423437 +4 0 0 0 0 0 0 0 0 +5 60000 12995 16792 30213 3466 489 9180 54921 +6 0 0 0 0 0 0 0 0 +7 180000 30322 30375 119303 16514 13514 27540 152716 +8 0 0 0 0 0 0 0 0 +9 240000 25990 23584 190426 32344 30844 36720 190796 +10 600000 18875 20375 560750 142810 142810 66067 423437 sums 1080000 88182 91126 900692 195134 187657 139507 821869 \ No newline at end of file diff --git a/taxcalc/reforms/BrownKhanna.out b/taxcalc/reforms/BrownKhanna.out index 1bf6dc69b..1578e7cdc 100644 --- a/taxcalc/reforms/BrownKhanna.out +++ b/taxcalc/reforms/BrownKhanna.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 12995 16792 30213 3466 -4405 9180 59815 -5 0 0 0 0 0 0 0 0 -6 180000 30322 30375 119303 16514 8698 27540 157532 -7 0 0 0 0 0 0 0 0 -8 240000 25990 23584 190426 32344 30844 36720 190796 -9 600000 18875 20375 560750 142810 142810 66067 423437 +4 0 0 0 0 0 0 0 0 +5 60000 12995 16792 30213 3466 -4405 9180 59815 +6 0 0 0 0 0 0 0 0 +7 180000 30322 30375 119303 16514 8698 27540 157532 +8 0 0 0 0 0 0 0 0 +9 240000 25990 23584 190426 32344 30844 36720 190796 +10 600000 18875 20375 560750 142810 142810 66067 423437 sums 1080000 88182 91126 900692 195134 177947 139507 831580 \ No newline at end of file diff --git a/taxcalc/reforms/Clinton2016.out b/taxcalc/reforms/Clinton2016.out index edf7bb5eb..3351275db 100644 --- a/taxcalc/reforms/Clinton2016.out +++ b/taxcalc/reforms/Clinton2016.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 12995 16792 30213 3466 -511 9180 55921 -5 0 0 0 0 0 0 0 0 -6 180000 30322 30375 119303 16514 11514 27540 154716 -7 0 0 0 0 0 0 0 0 -8 240000 25990 23584 190426 32344 29844 36720 191796 -9 600000 18875 20375 560750 142810 142810 66067 423437 +4 0 0 0 0 0 0 0 0 +5 60000 12995 16792 30213 3466 -511 9180 55921 +6 0 0 0 0 0 0 0 0 +7 180000 30322 30375 119303 16514 11514 27540 154716 +8 0 0 0 0 0 0 0 0 +9 240000 25990 23584 190426 32344 29844 36720 191796 +10 600000 18875 20375 560750 142810 142810 66067 423437 sums 1080000 88182 91126 900692 195134 183657 139507 825869 \ No newline at end of file diff --git a/taxcalc/reforms/Renacci.out b/taxcalc/reforms/Renacci.out index 2b2acc738..642ed4639 100644 --- a/taxcalc/reforms/Renacci.out +++ b/taxcalc/reforms/Renacci.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 16043 32087 11870 1187 -3245 9180 58655 -5 0 0 0 0 0 0 0 0 -6 180000 37434 64173 78393 7839 3676 27540 162554 -7 0 0 0 0 0 0 0 0 -8 240000 32087 48130 159784 21946 20446 36720 201194 -9 600000 23303 48130 528567 108077 108077 66067 458170 +4 0 0 0 0 0 0 0 0 +5 60000 16043 32087 11870 1187 -3245 9180 58655 +6 0 0 0 0 0 0 0 0 +7 180000 37434 64173 78393 7839 3676 27540 162554 +8 0 0 0 0 0 0 0 0 +9 240000 32087 48130 159784 21946 20446 36720 201194 +10 600000 23303 48130 528567 108077 108077 66067 458170 sums 1080000 108867 192519 778613 139049 128954 139507 880572 \ No newline at end of file diff --git a/taxcalc/reforms/RyanBrady.out b/taxcalc/reforms/RyanBrady.out index 8a99cecb9..675c645b9 100644 --- a/taxcalc/reforms/RyanBrady.out +++ b/taxcalc/reforms/RyanBrady.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 0 32087 27913 3350 83 9180 55327 -5 0 0 0 0 0 0 0 0 -6 180000 0 57756 122244 15524 11024 27540 155206 -7 0 0 0 0 0 0 0 0 -8 240000 0 44921 195079 31153 28153 36720 193487 -9 600000 0 38504 561496 135919 135919 66067 430328 +4 0 0 0 0 0 0 0 0 +5 60000 0 32087 27913 3350 83 9180 55327 +6 0 0 0 0 0 0 0 0 +7 180000 0 57756 122244 15524 11024 27540 155206 +8 0 0 0 0 0 0 0 0 +9 240000 0 44921 195079 31153 28153 36720 193487 +10 600000 0 38504 561496 135919 135919 66067 430328 sums 1080000 0 173268 906732 185946 175179 139507 834347 \ No newline at end of file diff --git a/taxcalc/reforms/TCJA_House.out b/taxcalc/reforms/TCJA_House.out index aa2710b74..55a8d965c 100644 --- a/taxcalc/reforms/TCJA_House.out +++ b/taxcalc/reforms/TCJA_House.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 0 31227 28773 3453 -675 9180 56085 -5 0 0 0 0 0 0 0 0 -6 180000 0 56209 123791 14942 8942 27540 157288 -7 0 0 0 0 0 0 0 0 -8 240000 0 43718 196282 27758 23008 36720 198632 -9 600000 0 37473 562527 136297 134397 66067 431850 +4 0 0 0 0 0 0 0 0 +5 60000 0 31227 28773 3453 -675 9180 56085 +6 0 0 0 0 0 0 0 0 +7 180000 0 56209 123791 14942 8942 27540 157288 +8 0 0 0 0 0 0 0 0 +9 240000 0 43718 196282 27758 23008 36720 198632 +10 600000 0 37473 562527 136297 134397 66067 431850 sums 1080000 0 168627 911373 182449 165671 139507 843855 \ No newline at end of file diff --git a/taxcalc/reforms/TCJA_House_Amended.out b/taxcalc/reforms/TCJA_House_Amended.out index aa2710b74..55a8d965c 100644 --- a/taxcalc/reforms/TCJA_House_Amended.out +++ b/taxcalc/reforms/TCJA_House_Amended.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 0 31227 28773 3453 -675 9180 56085 -5 0 0 0 0 0 0 0 0 -6 180000 0 56209 123791 14942 8942 27540 157288 -7 0 0 0 0 0 0 0 0 -8 240000 0 43718 196282 27758 23008 36720 198632 -9 600000 0 37473 562527 136297 134397 66067 431850 +4 0 0 0 0 0 0 0 0 +5 60000 0 31227 28773 3453 -675 9180 56085 +6 0 0 0 0 0 0 0 0 +7 180000 0 56209 123791 14942 8942 27540 157288 +8 0 0 0 0 0 0 0 0 +9 240000 0 43718 196282 27758 23008 36720 198632 +10 600000 0 37473 562527 136297 134397 66067 431850 sums 1080000 0 168627 911373 182449 165671 139507 843855 \ No newline at end of file diff --git a/taxcalc/reforms/TCJA_Reconciliation.out b/taxcalc/reforms/TCJA_Reconciliation.out index b6e477ecc..48acaf281 100644 --- a/taxcalc/reforms/TCJA_Reconciliation.out +++ b/taxcalc/reforms/TCJA_Reconciliation.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 0 31227 28773 3029 -898 9180 56308 -5 0 0 0 0 0 0 0 0 -6 180000 0 56209 123791 14699 8699 27540 157531 -7 0 0 0 0 0 0 0 0 -8 240000 0 43718 196282 29361 23361 36720 198279 -9 600000 0 37473 562527 136039 132039 66067 434207 +4 0 0 0 0 0 0 0 0 +5 60000 0 31227 28773 3029 -898 9180 56308 +6 0 0 0 0 0 0 0 0 +7 180000 0 56209 123791 14699 8699 27540 157531 +8 0 0 0 0 0 0 0 0 +9 240000 0 43718 196282 29361 23361 36720 198279 +10 600000 0 37473 562527 136039 132039 66067 434207 sums 1080000 0 168627 911373 183129 163202 139507 846325 \ No newline at end of file diff --git a/taxcalc/reforms/TCJA_Senate.out b/taxcalc/reforms/TCJA_Senate.out index 07f872684..487d33d19 100644 --- a/taxcalc/reforms/TCJA_Senate.out +++ b/taxcalc/reforms/TCJA_Senate.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 0 31227 28773 3029 -548 9180 55958 -5 0 0 0 0 0 0 0 0 -6 180000 0 56209 123791 14736 9786 27540 156444 -7 0 0 0 0 0 0 0 0 -8 240000 0 43718 196282 30333 25383 36720 196257 -9 600000 0 37473 562527 138938 135638 66067 430608 +4 0 0 0 0 0 0 0 0 +5 60000 0 31227 28773 3029 -548 9180 55958 +6 0 0 0 0 0 0 0 0 +7 180000 0 56209 123791 14736 9786 27540 156444 +8 0 0 0 0 0 0 0 0 +9 240000 0 43718 196282 30333 25383 36720 196257 +10 600000 0 37473 562527 138938 135638 66067 430608 sums 1080000 0 168627 911373 187036 170259 139507 839268 \ No newline at end of file diff --git a/taxcalc/reforms/TCJA_Senate_111417.out b/taxcalc/reforms/TCJA_Senate_111417.out index 0a586a32a..d99105a8f 100644 --- a/taxcalc/reforms/TCJA_Senate_111417.out +++ b/taxcalc/reforms/TCJA_Senate_111417.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 0 31227 28773 3029 -898 9180 56308 -5 0 0 0 0 0 0 0 0 -6 180000 0 56209 123791 14699 8699 27540 157531 -7 0 0 0 0 0 0 0 0 -8 240000 0 43718 196282 29622 23622 36720 198018 -9 600000 0 37473 562527 136612 132612 66067 433635 +4 0 0 0 0 0 0 0 0 +5 60000 0 31227 28773 3029 -898 9180 56308 +6 0 0 0 0 0 0 0 0 +7 180000 0 56209 123791 14699 8699 27540 157531 +8 0 0 0 0 0 0 0 0 +9 240000 0 43718 196282 29622 23622 36720 198018 +10 600000 0 37473 562527 136612 132612 66067 433635 sums 1080000 0 168627 911373 183962 164035 139507 845492 \ No newline at end of file diff --git a/taxcalc/reforms/TCJA_Senate_120117.out b/taxcalc/reforms/TCJA_Senate_120117.out index 0a586a32a..d99105a8f 100644 --- a/taxcalc/reforms/TCJA_Senate_120117.out +++ b/taxcalc/reforms/TCJA_Senate_120117.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 0 31227 28773 3029 -898 9180 56308 -5 0 0 0 0 0 0 0 0 -6 180000 0 56209 123791 14699 8699 27540 157531 -7 0 0 0 0 0 0 0 0 -8 240000 0 43718 196282 29622 23622 36720 198018 -9 600000 0 37473 562527 136612 132612 66067 433635 +4 0 0 0 0 0 0 0 0 +5 60000 0 31227 28773 3029 -898 9180 56308 +6 0 0 0 0 0 0 0 0 +7 180000 0 56209 123791 14699 8699 27540 157531 +8 0 0 0 0 0 0 0 0 +9 240000 0 43718 196282 29622 23622 36720 198018 +10 600000 0 37473 562527 136612 132612 66067 433635 sums 1080000 0 168627 911373 183962 164035 139507 845492 \ No newline at end of file diff --git a/taxcalc/reforms/Trump2016.out b/taxcalc/reforms/Trump2016.out index 7edda1134..78d70dcee 100644 --- a/taxcalc/reforms/Trump2016.out +++ b/taxcalc/reforms/Trump2016.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 0 32087 27913 3350 373 9180 55037 -5 0 0 0 0 0 0 0 0 -6 180000 0 64173 115827 14900 11900 27540 154330 -7 0 0 0 0 0 0 0 0 -8 240000 0 48130 191870 32325 30825 36720 190815 -9 600000 0 48130 551870 140216 140216 66067 426031 +4 0 0 0 0 0 0 0 0 +5 60000 0 32087 27913 3350 373 9180 55037 +6 0 0 0 0 0 0 0 0 +7 180000 0 64173 115827 14900 11900 27540 154330 +8 0 0 0 0 0 0 0 0 +9 240000 0 48130 191870 32325 30825 36720 190815 +10 600000 0 48130 551870 140216 140216 66067 426031 sums 1080000 0 192519 887481 190791 183314 139507 826213 \ No newline at end of file diff --git a/taxcalc/reforms/Trump2017.out b/taxcalc/reforms/Trump2017.out index 8f34c5139..30b412459 100644 --- a/taxcalc/reforms/Trump2017.out +++ b/taxcalc/reforms/Trump2017.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 12995 33584 13421 1342 -1635 9180 57045 -5 0 0 0 0 0 0 0 0 -6 180000 30322 60751 88928 9117 6117 27540 160113 -7 0 0 0 0 0 0 0 0 -8 240000 25990 47167 166843 22235 20735 36720 200905 -9 600000 18875 40750 540375 116828 116828 66067 449418 +4 0 0 0 0 0 0 0 0 +5 60000 12995 33584 13421 1342 -1635 9180 57045 +6 0 0 0 0 0 0 0 0 +7 180000 30322 60751 88928 9117 6117 27540 160113 +8 0 0 0 0 0 0 0 0 +9 240000 25990 47167 166843 22235 20735 36720 200905 +10 600000 18875 40750 540375 116828 116828 66067 449418 sums 1080000 88182 182252 809566 149522 142045 139507 867481 \ No newline at end of file diff --git a/taxcalc/reforms/clp.out b/taxcalc/reforms/clp.out index b6e477ecc..48acaf281 100644 --- a/taxcalc/reforms/clp.out +++ b/taxcalc/reforms/clp.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 0 31227 28773 3029 -898 9180 56308 -5 0 0 0 0 0 0 0 0 -6 180000 0 56209 123791 14699 8699 27540 157531 -7 0 0 0 0 0 0 0 0 -8 240000 0 43718 196282 29361 23361 36720 198279 -9 600000 0 37473 562527 136039 132039 66067 434207 +4 0 0 0 0 0 0 0 0 +5 60000 0 31227 28773 3029 -898 9180 56308 +6 0 0 0 0 0 0 0 0 +7 180000 0 56209 123791 14699 8699 27540 157531 +8 0 0 0 0 0 0 0 0 +9 240000 0 43718 196282 29361 23361 36720 198279 +10 600000 0 37473 562527 136039 132039 66067 434207 sums 1080000 0 168627 911373 183129 163202 139507 846325 \ No newline at end of file diff --git a/taxcalc/reforms/ptaxes0.out b/taxcalc/reforms/ptaxes0.out index 14bdd0c42..1de1cd558 100644 --- a/taxcalc/reforms/ptaxes0.out +++ b/taxcalc/reforms/ptaxes0.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 12995 16792 30213 3466 489 10200 54411 -5 0 0 0 0 0 0 0 0 -6 180000 30322 30375 119303 16514 13514 30600 151186 -7 0 0 0 0 0 0 0 0 -8 240000 25990 23584 190426 32344 30844 40800 188756 -9 600000 18875 20375 560750 142810 142810 72761 420090 +4 0 0 0 0 0 0 0 0 +5 60000 12995 16792 30213 3466 489 10200 54411 +6 0 0 0 0 0 0 0 0 +7 180000 30322 30375 119303 16514 13514 30600 151186 +8 0 0 0 0 0 0 0 0 +9 240000 25990 23584 190426 32344 30844 40800 188756 +10 600000 18875 20375 560750 142810 142810 72761 420090 sums 1080000 88182 91126 900692 195134 187657 154361 814442 \ No newline at end of file diff --git a/taxcalc/reforms/ptaxes1.out b/taxcalc/reforms/ptaxes1.out index 0b712674a..d1d0f0187 100644 --- a/taxcalc/reforms/ptaxes1.out +++ b/taxcalc/reforms/ptaxes1.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 12995 16792 30213 3466 489 9180 54921 -5 0 0 0 0 0 0 0 0 -6 180000 30322 30375 119303 16514 13514 27540 152716 -7 0 0 0 0 0 0 0 0 -8 240000 25990 23584 190426 32344 30844 36720 190796 -9 600000 18875 20375 560750 142810 142810 79600 416670 +4 0 0 0 0 0 0 0 0 +5 60000 12995 16792 30213 3466 489 9180 54921 +6 0 0 0 0 0 0 0 0 +7 180000 30322 30375 119303 16514 13514 27540 152716 +8 0 0 0 0 0 0 0 0 +9 240000 25990 23584 190426 32344 30844 36720 190796 +10 600000 18875 20375 560750 142810 142810 79600 416670 sums 1080000 88182 91126 900692 195134 187657 153040 815103 \ No newline at end of file diff --git a/taxcalc/reforms/ptaxes2.out b/taxcalc/reforms/ptaxes2.out index 1828e0179..37aaa7da0 100644 --- a/taxcalc/reforms/ptaxes2.out +++ b/taxcalc/reforms/ptaxes2.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 12995 16792 30213 3466 489 9180 54921 -5 0 0 0 0 0 0 0 0 -6 180000 30322 30375 119303 16514 13514 27540 152716 -7 0 0 0 0 0 0 0 0 -8 240000 25990 23584 190426 32344 30844 36720 190796 -9 600000 18875 20375 560750 142810 142810 93240 409850 +4 0 0 0 0 0 0 0 0 +5 60000 12995 16792 30213 3466 489 9180 54921 +6 0 0 0 0 0 0 0 0 +7 180000 30322 30375 119303 16514 13514 27540 152716 +8 0 0 0 0 0 0 0 0 +9 240000 25990 23584 190426 32344 30844 36720 190796 +10 600000 18875 20375 560750 142810 142810 93240 409850 sums 1080000 88182 91126 900692 195134 187657 166680 808283 \ No newline at end of file diff --git a/taxcalc/reforms/ptaxes3.out b/taxcalc/reforms/ptaxes3.out index 9e4ab3fd8..a475e5a00 100644 --- a/taxcalc/reforms/ptaxes3.out +++ b/taxcalc/reforms/ptaxes3.out @@ -3,10 +3,11 @@ 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 -4 60000 12995 16792 30213 3466 489 9180 54921 -5 0 0 0 0 0 0 0 0 -6 180000 30322 30375 119303 16514 13514 27540 152716 -7 0 0 0 0 0 0 0 0 -8 240000 25990 23584 190426 32344 30844 36720 190796 -9 600000 18875 20375 560750 142810 142810 65669 423834 +4 0 0 0 0 0 0 0 0 +5 60000 12995 16792 30213 3466 489 9180 54921 +6 0 0 0 0 0 0 0 0 +7 180000 30322 30375 119303 16514 13514 27540 152716 +8 0 0 0 0 0 0 0 0 +9 240000 25990 23584 190426 32344 30844 36720 190796 +10 600000 18875 20375 560750 142810 142810 65669 423834 sums 1080000 88182 91126 900692 195134 187657 139109 822267 \ No newline at end of file diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py index 337b3e351..c7300d8b2 100644 --- a/taxcalc/tests/test_utils.py +++ b/taxcalc/tests/test_utils.py @@ -21,6 +21,7 @@ DIST_TABLE_COLUMNS, DIST_TABLE_LABELS, DIFF_VARIABLES, DIFF_TABLE_COLUMNS, DIFF_TABLE_LABELS, + SMALL_INCOME_BINS, LARGE_INCOME_BINS, create_distribution_table, create_difference_table, weighted_count_lt_zero, weighted_count_gt_zero, weighted_count, weighted_sum, weighted_mean, @@ -87,6 +88,7 @@ def test_create_tables(cps_subsample): tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [np.nan, + np.nan, -0.14, -0.58, -0.70, @@ -112,6 +114,7 @@ def test_create_tables(cps_subsample): tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [np.nan, + np.nan, -0.14, -0.58, -0.70, @@ -139,6 +142,7 @@ def test_create_tables(cps_subsample): tax_to_diff='iitax') assert isinstance(diff, pd.DataFrame) expected = [np.nan, + np.nan, -0.29, -0.07, -0.22, @@ -173,6 +177,7 @@ def test_create_tables(cps_subsample): tax_to_diff='combined') assert isinstance(diff, pd.DataFrame) expected = [0, + 0, 855188, 15425829, 26212078, @@ -195,6 +200,7 @@ def test_create_tables(cps_subsample): for val in diff[tabcol].values: print('{:.0f},'.format(val)) expected = [0.00, + 0.00, 0.15, 2.65, 4.51, @@ -217,6 +223,7 @@ def test_create_tables(cps_subsample): for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [np.nan, + np.nan, -0.11, -0.62, -0.71, @@ -239,6 +246,7 @@ def test_create_tables(cps_subsample): for val in diff[tabcol].values: print('{:.2f},'.format(val)) expected = [np.nan, + np.nan, -0.11, -0.62, -0.71, @@ -269,6 +277,7 @@ def test_create_tables(cps_subsample): result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [0, + 0, -56140397, -67237556, -58897159, @@ -291,6 +300,7 @@ def test_create_tables(cps_subsample): for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, + 0, 1202, 13981, 21932, @@ -313,6 +323,7 @@ def test_create_tables(cps_subsample): for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, + 0, 812766585, 2639118220, 3940557055, @@ -335,6 +346,7 @@ def test_create_tables(cps_subsample): for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, + 0, 801755209, 2466382489, 3674186760, @@ -363,6 +375,7 @@ def test_create_tables(cps_subsample): result_type='weighted_sum') assert isinstance(dist, pd.DataFrame) expected = [0, + 0, -44670465, -79534586, -61791623, @@ -383,6 +396,7 @@ def test_create_tables(cps_subsample): for val in dist[tabcol].values: print('{:.0f},'.format(val)) expected = [0, + 0, 1202, 13625, 27355, @@ -638,8 +652,7 @@ def test_weighted_perc_cut(): def test_add_income_bins(): dta = np.arange(1, 1e6, 5000) dfx = pd.DataFrame(data=dta, columns=['expanded_income']) - bins = [-9e99, 0, 9999, 19999, 29999, 39999, 49999, 74999, 99999, - 200000, 9e99] + bins = LARGE_INCOME_BINS dfr = add_income_bins(dfx, 'expanded_income', bin_type='tpc', bins=None, right=True) groupedr = dfr.groupby('bins') @@ -661,9 +674,8 @@ def test_add_income_bins(): def test_add_income_bins_soi(): dta = np.arange(1, 1e6, 5000) dfx = pd.DataFrame(data=dta, columns=['expanded_income']) - bins = [-9e99, 0, 4999, 9999, 14999, 19999, 24999, 29999, 39999, - 49999, 74999, 99999, 199999, 499999, 999999, 1499999, - 1999999, 4999999, 9999999, 9e99] + + bins = SMALL_INCOME_BINS dfr = add_income_bins(dfx, 'expanded_income', bin_type='soi', right=True) groupedr = dfr.groupby('bins') idx = 1 diff --git a/taxcalc/utils.py b/taxcalc/utils.py index aee2821d7..604fc5b45 100644 --- a/taxcalc/utils.py +++ b/taxcalc/utils.py @@ -122,24 +122,24 @@ 'Consumption Value of Benefits', '% Change in After-Tax Income'] -DECILE_ROW_NAMES = ['0-10zn', '0-10p', '10-20', '20-30', '30-40', '40-50', +DECILE_ROW_NAMES = ['0-10n', '0-10z', '0-10p', + '10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90', '90-100', 'ALL', '90-95', '95-99', 'Top 1%'] -STANDARD_ROW_NAMES = ['<=$0K', '$0-10K', '$10-20K', '$20-30K', '$30-40K', - '$40-50K', '$50-75K', '$75-100K', - '$100-200K', '$200-500K', - '$500-1000K', '>$1000K', 'ALL'] +STANDARD_ROW_NAMES = ['<$0K', '=$0K', '$0-10K', '$10-20K', '$20-30K', + '$30-40K', '$40-50K', '$50-75K', '$75-100K', + '$100-200K', '$200-500K', '$500-1000K', '>$1000K', 'ALL'] -STANDARD_INCOME_BINS = [-9e99, 1e-9, 9999, 19999, 29999, 39999, 49999, +STANDARD_INCOME_BINS = [-9e99, -1e-9, 1e-9, 9999, 19999, 29999, 39999, 49999, 74999, 99999, 199999, 499999, 1000000, 9e99] -LARGE_INCOME_BINS = [-9e99, 1e-9, 9999, 19999, 29999, 39999, 49999, +LARGE_INCOME_BINS = [-9e99, -1e-9, 1e-9, 9999, 19999, 29999, 39999, 49999, 74999, 99999, 200000, 9e99] -SMALL_INCOME_BINS = [-9e99, 1e-9, 4999, 9999, 14999, 19999, 24999, 29999, - 39999, 49999, 74999, 99999, 199999, 499999, 999999, +SMALL_INCOME_BINS = [-9e99, -1e-9, 1e-9, 4999, 9999, 14999, 19999, 24999, + 29999, 39999, 49999, 74999, 99999, 199999, 499999, 999999, 1499999, 1999999, 4999999, 9999999, 9e99] @@ -262,22 +262,14 @@ def create_distribution_table(vdf, groupby, income_measure, result_type): options for input: 'weighted_deciles', 'standard_income_bins', 'large_income_bins', 'small_income_bins'; determines how the columns in the resulting Pandas DataFrame are sorted - NOTE: when groupby is 'weighted_deciles', the returned table has three - extra rows containing top-decile detail consisting of statistics - for the 0.90-0.95 quantile range (bottom half of top decile), - for the 0.95-0.99 quantile range, and - for the 0.99-1.00 quantile range (top one percent); and the returned - table may have a fourth extra row that shows bottom-decile detail - with the bottom decile split into filing units with non-positive and - positive values of the specified income_measure variable. result_type : String object options for input: 'weighted_sum' or 'weighted_avg'; - determines how the data should be manipulated + determines how the table statistices are computed income_measure : String object - options for input: 'expanded_income', 'c00100'(AGI), - 'expanded_income_baseline', 'c00100_baseline' + options for input: 'expanded_income', 'c00100'(AGI) + specifies statistic used to place filing units in bins or deciles Notes ----- @@ -293,10 +285,18 @@ def create_distribution_table(vdf, groupby, income_measure, result_type): Returns ------- - distribution table as a Pandas DataFrame, with DIST_TABLE_COLUMNS and - groupby rows, where the rows run from lowest bin/decile to the highest - followed by a sums row with the top-decile detail in an additional three - rows following the sums row + distribution table as a Pandas DataFrame with DIST_TABLE_COLUMNS and + groupby rows. + NOTE: when groupby is 'weighted_deciles', the returned table has three + extra rows containing top-decile detail consisting of statistics + for the 0.90-0.95 quantile range (bottom half of top decile), + for the 0.95-0.99 quantile range, and + for the 0.99-1.00 quantile range (top one percent); and the + returned table splits the bottom decile into filing units with + negative (denoted by a 0-10n row label), + zero (denoted by a 0-10z row label), and + positive (denoted by a 0-10p row label) values of the + specified income_measure. """ # pylint: disable=too-many-statements,too-many-locals,too-many-branches # nested function that specifies calculated columns @@ -372,8 +372,8 @@ def stat_dataframe(gpdf): pdf = gpdf.get_group(1) # bottom decile as its own DataFrame pdf = copy.deepcopy(pdf) # eliminates Pandas warning in pd.cut() pdf['bins'] = pd.cut(pdf[income_measure], - bins=[-9e99, 1e-9, 9e99], - labels=[1, 2]) + bins=[-9e99, -1e-9, 1e-9, 9e99], + labels=[1, 2, 3]) gpdfx = pdf.groupby('bins', as_index=False) rows = stat_dataframe(gpdfx) dist_table = pd.concat([rows, dist_table.iloc[1:11]]) @@ -429,20 +429,12 @@ def create_difference_table(vdf1, vdf2, groupby, income_measure, tax_to_diff): groupby : String object options for input: 'weighted_deciles', 'standard_income_bins', - 'large_income_bins', 'small_income_bins' - specifies kind of bins used to group filing units - NOTE: when groupby is 'weighted_deciles', the returned table has three - extra rows containing top-decile detail consisting of statistics - for the 0.90-0.95 quantile range (bottom half of top decile), - for the 0.95-0.99 quantile range, and - for the 0.99-1.00 quantile range (top one percent); and the returned - table may have a fourth extra row that shows bottom-decile detail - with the bottom decile split into filing units with non-positive and - positive values of the specified income_measure variable. + 'large_income_bins', 'small_income_bins'; + determines how the columns in the resulting Pandas DataFrame are sorted income_measure : String object options for input: 'expanded_income', 'c00100'(AGI) - specifies statistic to place filing units in bins + specifies statistic used to place filing units in bins or deciles tax_to_diff : String object options for input: 'iitax', 'payrolltax', 'combined' @@ -450,10 +442,18 @@ def create_difference_table(vdf1, vdf2, groupby, income_measure, tax_to_diff): Returns ------- - difference table as a Pandas DataFrame, with DIFF_TABLE_COLUMNS and - groupby rows, where the rows run from lowest bin/decile to the highest - followed by a sums row with the top-decile detail in an additional three - rows following the sums row + difference table as a Pandas DataFrame with DIFF_TABLE_COLUMNS and + groupby rows. + NOTE: when groupby is 'weighted_deciles', the returned table has three + extra rows containing top-decile detail consisting of statistics + for the 0.90-0.95 quantile range (bottom half of top decile), + for the 0.95-0.99 quantile range, and + for the 0.99-1.00 quantile range (top one percent); and the + returned table splits the bottom decile into filing units with + negative (denoted by a 0-10n row label), + zero (denoted by a 0-10z row label), and + positive (denoted by a 0-10p row label) values of the + specified income_measure. """ # pylint: disable=too-many-statements # nested function that actually creates the difference table @@ -532,8 +532,8 @@ def weighted_share_of_total(gpdf, colname, total): pdf = gpdf.get_group(1) # bottom decile as its own DataFrame pdf = copy.deepcopy(pdf) # eliminates Pandas warning in pd.cut() pdf['bins'] = pd.cut(pdf[income_measure], - bins=[-9e99, 1e-9, 9e99], - labels=[1, 2]) + bins=[-9e99, -1e-9, 1e-9, 9e99], + labels=[1, 2, 3]) gpdfx = pdf.groupby('bins', as_index=False) rows = stat_dataframe(gpdfx) diffs = pd.concat([rows, diffs.iloc[1:11]])