Merge pull request #1795 from martinholmer/revise-nonsmall-diffs

Revise nonsmall_diff utility function and use it in unit tests
PSLmodels · Dec 23, 2017 · 404270a · 404270a
2 parents 37ab67c + 2d609e7
commit 404270a
Show file tree

Hide file tree

Showing 7 changed files with 38 additions and 33 deletions.
diff --git a/taxcalc/reforms/earnings_shifting.py b/taxcalc/reforms/earnings_shifting.py
@@ -28,7 +28,6 @@
 """
 # CODING-STYLE CHECKS:
 # pep8 --ignore=E402 earnings_shifting.py
-# pylint --disable=locally-disabled earnings_shifting.py
 
 import sys
 import argparse

diff --git a/taxcalc/tests/test_cpscsv.py b/taxcalc/tests/test_cpscsv.py
@@ -39,19 +39,18 @@ def test_agg(tests_path):
     taxes_fullsample = adt.loc["Combined Liability ($b)"]
     # convert adt to a string with a trailing EOL character
     actual_results = adt.to_string() + '\n'
-    act = actual_results.splitlines(True)
     # read expected results from file
     aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt')
     with open(aggres_path, 'r') as expected_file:
         txt = expected_file.read()
     expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
-    exp = expected_results.splitlines(True)
-    # ensure act and exp line lists have differences no more than small value
+    # ensure actual and expected results have no nonsmall differences
     if sys.version_info.major == 2:
-        small = 1e-6  # tighter test for Python 2.7
+        small = 0.0  # tighter test for Python 2.7
     else:
-        small = 0.1 + 1e-6  # looser test for Python 3.6
-    diffs = nonsmall_diffs(act, exp, small)
+        small = 0.1  # looser test for Python 3.6
+    diffs = nonsmall_diffs(actual_results.splitlines(True),
+                           expected_results.splitlines(True), small)
     if diffs:
         new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
         with open(new_filename, 'w') as new_file:

diff --git a/taxcalc/tests/test_puf_var_stats.py b/taxcalc/tests/test_puf_var_stats.py
@@ -6,6 +6,7 @@
 # pylint --disable=locally-disabled test_puf_var_stats.py
 
 import os
+import sys
 import json
 import copy
 import numpy as np
@@ -93,7 +94,7 @@ def calculate_mean_stats(calc, table, year):
     table[str(year)] = means
 
 
-def differences(new_filename, old_filename, stat_kind, small=0.0):
+def differences(new_filename, old_filename, stat_kind, small):
     """
     Return message string if there are differences at least as large as small;
     otherwise (i.e., if there are only small differences) return empty string.
@@ -102,9 +103,8 @@ def differences(new_filename, old_filename, stat_kind, small=0.0):
         new_text = vfile.read()
     with open(old_filename, 'r') as vfile:
         old_text = vfile.read()
-    new = new_text.splitlines(True)
-    old = old_text.splitlines(True)
-    if nonsmall_diffs(new, old, small):
+    if nonsmall_diffs(new_text.splitlines(True),
+                      old_text.splitlines(True), small):
         new_name = os.path.basename(new_filename)
         old_name = os.path.basename(old_filename)
         msg = '{} RESULTS DIFFER:\n'.format(stat_kind)
@@ -158,10 +158,18 @@ def test_puf_var_stats(tests_path, puf_fullsample):
     table_corr.sort_index(inplace=True)
     table_corr.to_csv(corr_path, float_format='%8.2f',
                       columns=table_corr.index)
-    # compare new and old CSV files for differences no larger than small value
-    mean_msg = differences(mean_path, mean_path[:-4],
-                           'MEAN', small=1.000001)
-    corr_msg = differences(corr_path, corr_path[:-4],
-                           'CORR', small=0.010001)
+    # compare new and old CSV files for nonsmall differences
+    if sys.version_info.major == 2:
+        # tighter tests for Python 2.7
+        mean_msg = differences(mean_path, mean_path[:-4],
+                               'MEAN', small=0.0)
+        corr_msg = differences(corr_path, corr_path[:-4],
+                               'CORR', small=0.0)
+    else:
+        # looser tests for Python 3.6
+        mean_msg = differences(mean_path, mean_path[:-4],
+                               'MEAN', small=1.0)
+        corr_msg = differences(corr_path, corr_path[:-4],
+                               'CORR', small=0.01)
     if mean_msg or corr_msg:
         raise ValueError(mean_msg + corr_msg)
diff --git a/taxcalc/tests/test_pufcsv.py b/taxcalc/tests/test_pufcsv.py
@@ -54,9 +54,9 @@ def test_agg(tests_path, puf_fullsample):
     expect = expected_results.splitlines(True)
     # ensure actual and expect lines have differences no more than small value
     if sys.version_info.major == 2:
-        small = 1e-6  # tighter test for Python 2.7
+        small = 0.0  # tighter test for Python 2.7
     else:
-        small = 0.1 + 1e-6  # looser test for Python 3.6
+        small = 0.1  # looser test for Python 3.6
     diffs = nonsmall_diffs(actual, expect, small)
     if diffs:
         new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
@@ -213,13 +213,11 @@ def test_mtr(tests_path, puf_path):
         res += mtr_bin_counts(mtr_ptax, PTAX_MTR_BIN_EDGES, recid)
         res += mtr_bin_counts(mtr_itax, ITAX_MTR_BIN_EDGES, recid)
     # check for differences between actual and expected results
-    actual = res.splitlines(True)
     mtrres_path = os.path.join(tests_path, 'pufcsv_mtr_expect.txt')
     with open(mtrres_path, 'r') as expected_file:
         txt = expected_file.read()
     expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
-    expected = expected_results.splitlines(True)
-    if nonsmall_diffs(actual, expected, small=1e-6):
+    if nonsmall_diffs(res.splitlines(True), expected_results.splitlines(True)):
         new_filename = '{}{}'.format(mtrres_path[:-10], 'actual.txt')
         with open(new_filename, 'w') as new_file:
             new_file.write(res)

diff --git a/taxcalc/tests/test_reforms.py b/taxcalc/tests/test_reforms.py
@@ -61,18 +61,16 @@ def write_distribution_table(calc, resfilename):
     # embedded function used only in test_reform_json_and_output
     def res_and_out_are_same(base):
         """
-        Return true if base.res and base.out file contents are the same;
-        return false if base.res and base.out file contents differ.
+        Return True if base.res and base.out file contents are the same;
+        return False if base.res and base.out file contents differ.
         """
-        with open(base + '.out') as outfile:
-            exp_res = outfile.read()
-        exp = exp_res.splitlines(True)
         with open(base + '.res') as resfile:
             act_res = resfile.read()
-        act = act_res.splitlines(True)
-        # check that act & exp have differences no more than small value
-        diffs = nonsmall_diffs(act, exp, small=1e-6)
-        return not diffs
+        with open(base + '.out') as outfile:
+            exp_res = outfile.read()
+        # check to see if act_res & exp_res have differences
+        return not nonsmall_diffs(act_res.splitlines(True),
+                                  exp_res.splitlines(True))
     # specify Records object containing cases data
     tax_year = 2020
     cases_path = os.path.join(tests_path, '..', 'reforms', 'cases.csv')

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
@@ -926,5 +926,5 @@ def test_nonsmall_diffs():
     assert nonsmall_diffs(['AaA'], ['AAA'])
     assert not nonsmall_diffs(['AAA'], ['AAA'])
     assert nonsmall_diffs(['12.3'], ['12.2'])
-    assert not nonsmall_diffs(['12.3'], ['12.2'], small=0.10001)
+    assert not nonsmall_diffs(['12.3'], ['12.2'], small=0.1)
     assert nonsmall_diffs(['12.3'], ['AAA'])
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
@@ -1456,7 +1456,7 @@ def nonsmall_diffs(linelist1, linelist2, small=0.0):
     linelist1 and linelist2) by more than the small amount.
     NOTE: this function is meant to be used only in the unit tests to handle
     small differences in floating point values generated by Python 2.7 and 3.6,
-    where the small amount if used only under Python 3.6.
+    where a nonzero small amount is used only under Python 3.6.
     """
     # embedded function used only in nonsmall_diffs function
     def isfloat(value):
@@ -1473,6 +1473,9 @@ def isfloat(value):
     assert isinstance(linelist2, list)
     if len(linelist1) != len(linelist2):
         return True
+    assert small >= 0.0 and small <= 1.0
+    epsilon = 1e-6
+    smallamt = small + epsilon
     for line1, line2 in zip(linelist1, linelist2):
         if line1 == line2:
             continue
@@ -1483,7 +1486,7 @@ def isfloat(value):
                 tok1_isfloat = isfloat(tok1)
                 tok2_isfloat = isfloat(tok2)
                 if tok1_isfloat and tok2_isfloat:
-                    if abs(float(tok1) - float(tok2)) < small:
+                    if abs(float(tok1) - float(tok2)) <= smallamt:
                         continue
                     else:
                         return True