diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index f86de86eb..74c3ce7a7 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,6 +2,7 @@ # Next Release - [#175](https://github.com/IAMconsortium/pyam/pull/175) Update link to tutorial in readme.md +- [#174](https://github.com/IAMconsortium/pyam/pull/174) Add a function `difference()` to compare two IamDataFrames - [#171](https://github.com/IAMconsortium/pyam/pull/171) Fix a bug when reading from an `ixmp.TimeSeries` object, refactor to mitigate circular dependency - [#162](https://github.com/IAMconsortium/pyam/pull/162) Add a function to sum and append timeseries components to an aggregate variable - [#152](https://github.com/IAMconsortium/pyam/pull/152) Fix bug where scatter plots did not work with property metadata when using two variables (#136, #152) diff --git a/pyam/core.py b/pyam/core.py index c21a4b172..bddd8c601 100644 --- a/pyam/core.py +++ b/pyam/core.py @@ -1418,3 +1418,25 @@ def filter_by_meta(data, df, join_meta=False, **kwargs): data.index.name = None return data + + +def compare(left, right, left_label='left', right_label='right', + drop_close=True, **kwargs): + """Compare the data in two IamDataFrames and return a pd.DataFrame + + Parameters + ---------- + left, right: IamDataFrames + the IamDataFrames to be compared + left_label, right_label: str, default `left`, `right` + column names of the returned dataframe + drop_close: bool, default True + remove all data where `left` and `right` are close + kwargs: passed to `np.isclose()` + """ + ret = pd.concat({right_label: right.data.set_index(right._LONG_IDX), + left_label: left.data.set_index(left._LONG_IDX)}, axis=1) + ret.columns = ret.columns.droplevel(1) + if drop_close: + ret = ret[~np.isclose(ret[left_label], ret[right_label], **kwargs)] + return ret[[right_label, left_label]] diff --git a/tests/test_feature_compare.py b/tests/test_feature_compare.py new file mode 100644 index 000000000..d6074a926 --- /dev/null +++ b/tests/test_feature_compare.py @@ -0,0 +1,29 @@ +import copy +import numpy as np +import pandas as pd +from pyam import compare, IAMC_IDX + + +def test_compare(meta_df): + clone = copy.deepcopy(meta_df) + clone.data.iloc[0, clone.data.columns.get_loc('value')] = 2 + clone.rename({'variable': {'Primary Energy|Coal': 'Primary Energy|Gas'}}, + inplace=True) + + obs = compare(meta_df, clone, right_label='meta_df', left_label='clone') + + exp = pd.DataFrame([ + ['Primary Energy', 'EJ/y', 2005, 2, 1], + ['Primary Energy|Coal', 'EJ/y', 2005, np.nan, 0.5], + ['Primary Energy|Coal', 'EJ/y', 2010, np.nan, 3], + ['Primary Energy|Gas', 'EJ/y', 2005, 0.5, np.nan], + ['Primary Energy|Gas', 'EJ/y', 2010, 3, np.nan], + ], + columns=['variable', 'unit', 'year', 'meta_df', 'clone'], + ) + exp['model'] = 'a_model' + exp['scenario'] = 'a_scenario' + exp['region'] = 'World' + exp = exp.set_index(IAMC_IDX + ['year']) + + pd.testing.assert_frame_equal(obs, exp)