Skip to content

Commit

Permalink
Remove duplicates between all_columns and extra_columns, add unit tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
ilectra committed Apr 11, 2024
1 parent 8906026 commit 264a07f
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 16 deletions.
7 changes: 7 additions & 0 deletions post-processing/config_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,13 @@ def parse_columns(self):
dict.fromkeys((self.plot_columns + self.filter_columns +
([self.scaling_column.get("name")] if self.scaling_column else []))))

# remove duplicated columns from the extra_columns list
duplicates = set(self.all_columns) & set(self.extra_columns)
while len(duplicates) != 0:
for d in duplicates:
self.extra_columns.remove(d)
duplicates = set(self.all_columns) & set(self.extra_columns)

def remove_redundant_types(self):
"""
Check for columns that are no longer in use and remove them from the type dict.
Expand Down
107 changes: 91 additions & 16 deletions post-processing/test_post_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ def test_high_level_script(run_sombrero):
"series": [],
"column_types": {"fake_column": "int",
"flops_value": "float",
"flops_unit": "str"}}))
"flops_unit": "str"},
"additional_columns_to_csv": []}))
except KeyError as e:
assert e.args[1] == ["fake_column"]
else:
Expand All @@ -256,7 +257,8 @@ def test_high_level_script(run_sombrero):
"series": [],
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str"}}))
"flops_unit": "str"},
"additional_columns_to_csv": []}))
except KeyError as e:
assert e.args[1] == "!!"
else:
Expand All @@ -276,7 +278,8 @@ def test_high_level_script(run_sombrero):
"series": [],
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str"}}))
"flops_unit": "str"},
"additional_columns_to_csv": []}))
except ValueError:
assert True
else:
Expand All @@ -296,7 +299,8 @@ def test_high_level_script(run_sombrero):
"series": [],
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str"}}))
"flops_unit": "str"},
"additional_columns_to_csv": []}))
except pd.errors.EmptyDataError:
assert True
else:
Expand All @@ -315,7 +319,8 @@ def test_high_level_script(run_sombrero):
"series": [],
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str"}}))
"flops_unit": "str"},
"additional_columns_to_csv": []}))
except RuntimeError:
assert True
else:
Expand All @@ -334,7 +339,8 @@ def test_high_level_script(run_sombrero):
"series": [],
"column_types": {"tasks": "int",
"cpus_per_task": "int",
"extra_param": "int"}}))
"extra_param": "int"},
"additional_columns_to_csv": []}))
except RuntimeError as e:
# three param columns found in changed log
EXPECTED_FIELDS = ["tasks", "cpus_per_task", "extra_param"]
Expand All @@ -356,7 +362,8 @@ def test_high_level_script(run_sombrero):
"series": [],
"column_types": {"job_completion_time": "datetime",
"flops_value": "float",
"flops_unit": "str"}}))
"flops_unit": "str"},
"additional_columns_to_csv": []}))
# check returned subset is as expected
assert len(df) == 2

Expand All @@ -374,7 +381,8 @@ def test_high_level_script(run_sombrero):
"column_types": {"tasks": "int",
"cpus_per_task": "int",
"flops_value": "float",
"flops_unit": "str"}}))
"flops_unit": "str"},
"additional_columns_to_csv": []}))
# check returned subset is as expected
assert len(df) == 4

Expand All @@ -394,7 +402,8 @@ def test_high_level_script(run_sombrero):
"flops_value": "float",
"flops_unit": "str",
"cpus_per_task": "int",
"OMP_NUM_THREADS": "int"}}))
"OMP_NUM_THREADS": "int"},
"additional_columns_to_csv": []}))
# check flops values are halved compared to previous df
assert (dfs["flops_value"].values == df[df["cpus_per_task"] == 2]["flops_value"].values/2).all()

Expand All @@ -413,7 +422,8 @@ def test_high_level_script(run_sombrero):
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str",
"cpus_per_task": "int"}}))
"cpus_per_task": "int"},
"additional_columns_to_csv": []}))
assert (dfs[dfs["cpus_per_task"] == 1]["flops_value"].values ==
df[df["cpus_per_task"] == 1]["flops_value"].values /
df[df["cpus_per_task"] == 1]["flops_value"].values).all()
Expand All @@ -437,7 +447,8 @@ def test_high_level_script(run_sombrero):
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str",
"cpus_per_task": "int"}}))
"cpus_per_task": "int"},
"additional_columns_to_csv": []}))
assert (dfs["flops_value"].values == df["flops_value"].values /
df[(df["cpus_per_task"] == 1) & (df["tasks"] == 2)]["flops_value"].iloc[0]).all()

Expand All @@ -456,7 +467,8 @@ def test_high_level_script(run_sombrero):
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str",
"cpus_per_task": "int"}}))
"cpus_per_task": "int"},
"additional_columns_to_csv": []}))
# check flops values are halved compared to previous df
assert (dfs["flops_value"].values == df[df["cpus_per_task"] == 2]["flops_value"].values/2).all()

Expand All @@ -476,7 +488,8 @@ def test_high_level_script(run_sombrero):
"flops_value": "float",
"flops_unit": "str",
"cpus_per_task": "int",
"OMP_NUM_THREADS": "str"}}))
"OMP_NUM_THREADS": "str"},
"additional_columns_to_csv": []}))
except TypeError:
assert True

Expand All @@ -496,7 +509,8 @@ def test_high_level_script(run_sombrero):
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str",
"cpus_per_task": "int"}}))
"cpus_per_task": "int"},
"additional_columns_to_csv": []}))
except ValueError:
assert True

Expand All @@ -514,7 +528,8 @@ def test_high_level_script(run_sombrero):
"series": [],
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str"}}))
"flops_unit": "str"},
"additional_columns_to_csv": []}))
except RuntimeError as e:
# dataframe has records from both files
assert len(e.args[1]) == 8
Expand All @@ -535,9 +550,69 @@ def test_high_level_script(run_sombrero):
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str",
"cpus_per_task": "int"}}))
"cpus_per_task": "int"},
"additional_columns_to_csv": []}))

EXPECTED_FIELDS = ["tasks", "flops_value", "flops_unit"]
# check returned subset is as expected
assert df.columns.tolist() == EXPECTED_FIELDS
assert len(df) == 1

# get filtered dataframe with extra columns for csv
df = PostProcessing(sombrero_log_path, save=True).run_post_processing(
ConfigHandler(
{"title": "Title",
"x_axis": {"value": "tasks",
"units": {"custom": None}},
"y_axis": {"value": "flops_value",
"units": {"column": "flops_unit"}},
"filters": {"and": [["tasks", ">", 1], ["cpus_per_task", "==", 2]],
"or": []},
"series": [],
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str",
"cpus_per_task": "int"},
"additional_columns_to_csv": ["spack_spec"]}
))

EXPECTED_FIELDS = ["tasks", "flops_value", "flops_unit"]
# check returned subset is as expected
assert df.columns.tolist() == EXPECTED_FIELDS
assert len(df) == 1

EXPECTED_FIELDS.append("spack_spec")
# check subset written to csv is as expected
output_file = "output.csv"
df_saved = pd.read_csv(output_file, index_col=0)
assert df_saved.columns.tolist() == EXPECTED_FIELDS
assert len(df_saved) == 1

# get filtered dataframe with duplicated extra columns for csv
df = PostProcessing(sombrero_log_path, save=True).run_post_processing(
ConfigHandler(
{"title": "Title",
"x_axis": {"value": "tasks",
"units": {"custom": None}},
"y_axis": {"value": "flops_value",
"units": {"column": "flops_unit"}},
"filters": {"and": [["tasks", ">", 1], ["cpus_per_task", "==", 2]],
"or": []},
"series": [],
"column_types": {"tasks": "int",
"flops_value": "float",
"flops_unit": "str",
"cpus_per_task": "int"},
"additional_columns_to_csv": ["tasks", "tasks"]}
))

EXPECTED_FIELDS = ["tasks", "flops_value", "flops_unit"]
# check returned subset is as expected
assert df.columns.tolist() == EXPECTED_FIELDS
assert len(df) == 1

# check subset written to csv is as expected
output_file = "output.csv"
df_saved = pd.read_csv(output_file, index_col=0)
assert df_saved.columns.tolist() == EXPECTED_FIELDS
assert len(df_saved) == 1

0 comments on commit 264a07f

Please sign in to comment.