diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 2607e0bde..3423ff2e0 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,6 +1,7 @@ # Next Release +- [#212](https://github.com/IAMconsortium/pyam/pull/212) Now natively support reading R-style data frames with year columns like "X2015" - [#202](https://github.com/IAMconsortium/pyam/pull/202) Extend the `df.rename()` function with a `check_duplicates (default True)` validation option - [#201](https://github.com/IAMconsortium/pyam/pull/201) Added native support for legends outside of plots with `pyam.plotting.OUTSIDE_LEGEND` with a tutorial - [#199](https://github.com/IAMconsortium/pyam/pull/199) Initializing an `IamDataFrame` accepts kwargs to fill or create from the data any missing required columns diff --git a/pyam/core.py b/pyam/core.py index 2bd170bda..42a9b0e07 100644 --- a/pyam/core.py +++ b/pyam/core.py @@ -57,6 +57,8 @@ class IamDataFrame(object): an instance of an TimeSeries or Scenario (requires `ixmp`), or pd.DataFrame or data file with IAMC-format data columns. A pd.DataFrame can have the required data as columns or index. + Support is provided additionally for R-style data columns for years, + like "X2015", etc. kwargs: if `value=col`, melt `col` to `value` and use `col` name as `variable`; else, mapping of columns required for an `IamDataFrame` to: @@ -64,6 +66,7 @@ class IamDataFrame(object): - multiple columns, which will be concatenated by pipe - a string to be used as value for this column """ + def __init__(self, data, **kwargs): """Initialize an instance of an IamDataFrame""" # import data from pd.DataFrame or read from source diff --git a/pyam/utils.py b/pyam/utils.py index 5695495f4..8135fb281 100644 --- a/pyam/utils.py +++ b/pyam/utils.py @@ -132,6 +132,24 @@ def format_data(df, **kwargs): if isinstance(df, pd.Series): df = df.to_frame() + # Check for R-style year columns, converting where necessary + def convert_r_columns(c): + try: + first = c[0] + second = c[1:] + if first == 'X': + try: + # bingo! was X2015 R-style, return the integer + return int(second) + except: + # nope, not an int, fall down to final return statement + pass + except: + # not a string/iterable/etc, fall down to final return statement + pass + return c + df.columns = df.columns.map(convert_r_columns) + # if `value` is given but not `variable`, # melt value columns and use column name as `variable` if 'value' in kwargs and 'variable' not in kwargs: diff --git a/tests/test_cast_to_iamc.py b/tests/test_cast_to_iamc.py index c515cb43d..c190b9981 100644 --- a/tests/test_cast_to_iamc.py +++ b/tests/test_cast_to_iamc.py @@ -79,3 +79,21 @@ def test_cast_with_variable_and_value(meta_df): assert compare(pe_df, df).empty pd.testing.assert_frame_equal(df.data, pe_df.data.reset_index(drop=True)) + + +def test_cast_from_r_df(test_pd_df): + df = test_pd_df.copy() + # last two columns are years + df.columns = list(df.columns[:-2]) + ['X{}'.format(c) + for c in df.columns[-2:]] + obs = IamDataFrame(df) + exp = IamDataFrame(test_pd_df) + assert compare(obs, exp).empty + pd.testing.assert_frame_equal(obs.data, exp.data) + + +def test_cast_from_r_df_err(test_pd_df): + df = test_pd_df.copy() + # last two columns are years + df.columns = list(df.columns[:-2]) + ['Xfoo', 'Xbar'] + pytest.raises(ValueError, IamDataFrame, df)