IAMconsortium · danielhuppmann · May 29, 2019 · May 14, 2019 · May 14, 2019 · May 14, 2019
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,6 +1,8 @@
 
 # Next Release
 
+- [#236](https://github.com/IAMconsortium/pyam/pull/236) Add `swap_time_for_year` method and confirm datetime column is compatible with pyam features
+
 # Release v0.2.0
 
 ## Highlights

diff --git a/appveyor.yml b/appveyor.yml
@@ -25,8 +25,8 @@ install:
   - conda create -n testing python="%PYTHON_VERSION%" --yes
   - conda --version
   - activate testing
-  - conda install -y numpy pandas pyyaml xlrd xlsxwriter matplotlib==3.0.3 seaborn==0.9.0 six requests jupyter nbconvert
-  - conda install -y -c conda-forge libiconv gdal fiona "geopandas<0.5.0" cartopy
+  - conda install -y numpy pandas pyyaml xlrd xlsxwriter matplotlib==3.0.3 seaborn==0.9.0 six requests jupyter nbconvert proj4==5.2.0
+  - conda install -y -c conda-forge libiconv gdal fiona "geopandas<0.5.0" cartopy cython pyproj==1.9.6
 
 build: false
 

diff --git a/ci/environment-conda-forge.txt b/ci/environment-conda-forge.txt
@@ -2,4 +2,5 @@ libiconv
 gdal
 fiona
 "geopandas<0.5.0"
+cython
 cartopy
diff --git a/pyam/core.py b/pyam/core.py
@@ -309,6 +309,36 @@ def interpolate(self, year):
         fill_values['year'] = year
         self.data = self.data.append(fill_values, ignore_index=True)
 
+    def swap_time_for_year(self, inplace=False):
+        """Convert the `time` column to `year`.
+
+        Parameters
+        ----------
+        inplace: bool, default False
+            if True, do operation inplace and return None
+
+        Raises
+        ------
+        ValueError
+            "time" is not a column of `self.data`
+        """
+        if "time" not in self.data:
+            raise ValueError("time column must be datetime to use this method")
+
+        ret = self.copy() if not inplace else self
+
+        ret.data["year"] = ret.data["time"].apply(lambda x: x.year)
+        ret.data = ret.data.drop("time", axis="columns")
+        ret._LONG_IDX = [v if v != "time" else "year" for v in ret._LONG_IDX]
+
+        if any(ret.data[ret._LONG_IDX].duplicated()):
+            error_msg = ('swapping time for year will result in duplicate '
+                         'rows in `data`!')
+            raise ValueError(error_msg)
+
+        if not inplace:
+            return ret
+
     def as_pandas(self, with_metadata=False):
         """Return this as a pd.DataFrame
 
@@ -1358,6 +1388,10 @@ def _check_rows(rows, check, in_range=True, return_test='any'):
         msg = 'Unknown checking type: {}'
         raise ValueError(msg.format(check.keys() - valid_checks))
 
+    if 'year' not in rows:
+        rows = rows.copy()
+        rows['year'] = rows['time'].apply(lambda x: x.year)
+
     where_idx = set(rows.index[rows['year'] == check['year']]) \
         if 'year' in check else set(rows.index)
     rows = rows.loc[list(where_idx)]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -167,15 +167,19 @@
 TEST_STACKPLOT_DF['scenario'] = 'a_scen'
 
 
-TIME_AXES = [
-    [2005, 2010],
-    [datetime(2005, 6, 17), datetime(2010, 7, 21)],
-    ['2005-06-17', '2010-07-21'],
-    ['2005-06-17 00:00:00', '2010-07-21 12:00:00']
-]
-
-
-@pytest.fixture(scope="function", params=TIME_AXES)
+TEST_YEARS = [2005, 2010]
+TEST_DTS = [datetime(2005, 6, 17), datetime(2010, 7, 21)]
+
+
+@pytest.fixture(
+    scope="function",
+    params=[
+        TEST_YEARS,
+        TEST_DTS,
+        ['2005-06-17', '2010-07-21'],
+        ['2005-06-17 00:00:00', '2010-07-21 12:00:00']
+    ]
+)
 def test_df(request):
     tdf = TEST_DF.iloc[:2]
     tdf = tdf.rename({2005: request.param[0], 2010: request.param[1]},
@@ -195,9 +199,17 @@ def test_pd_df():
     yield TEST_DF.copy()
 
 
-@pytest.fixture(scope="function")
-def meta_df():
-    df = IamDataFrame(data=TEST_DF)
+@pytest.fixture(
+    scope="function",
+    params=[
+        TEST_YEARS,
+        TEST_DTS,
+    ]
+)
+def meta_df(request):
+    mdf = TEST_DF.rename({2005: request.param[0], 2010: request.param[1]},
+                         axis="columns")
+    df = IamDataFrame(data=mdf)
     yield df
 
 

diff --git a/tests/test_cast_to_iamc.py b/tests/test_cast_to_iamc.py
@@ -2,40 +2,45 @@
 import pandas as pd
 from pyam import IamDataFrame, compare
 
+from conftest import TEST_DTS
 
 def test_cast_from_value_col(meta_df):
     df_with_value_cols = pd.DataFrame([
-        ['model_a', 'scen_a', 'World', 'EJ/y', 2005, 1, 0.5],
-        ['model_a', 'scen_a', 'World', 'EJ/y', 2010, 6., 3],
-        ['model_a', 'scen_b', 'World', 'EJ/y', 2005, 2, None],
-        ['model_a', 'scen_b', 'World', 'EJ/y', 2010, 7, None]
+        ['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5],
+        ['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3],
+        ['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None],
+        ['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None]
     ],
-        columns=['model', 'scenario', 'region', 'unit', 'year',
+        columns=['model', 'scenario', 'region', 'unit', 'time',
                  'Primary Energy', 'Primary Energy|Coal'],
     )
     df = IamDataFrame(df_with_value_cols,
                       value=['Primary Energy', 'Primary Energy|Coal'])
+    if "year" in meta_df.data.columns:
+        df = df.swap_time_for_year()
 
     assert compare(meta_df, df).empty
-    pd.testing.assert_frame_equal(df.data, meta_df.data)
+    pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
 
 
 def test_cast_from_value_col_and_args(meta_df):
     # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210)
     df_with_value_cols = pd.DataFrame([
-        ['scen_a', 'World', 'EJ/y', 2005, 1, 0.5],
-        ['scen_a', 'World', 'EJ/y', 2010, 6., 3],
-        ['scen_b', 'World', 'EJ/y', 2005, 2, None],
-        ['scen_b', 'World', 'EJ/y', 2010, 7, None]
+        ['scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5],
+        ['scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3],
+        ['scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None],
+        ['scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None]
     ],
-        columns=['scenario', 'iso', 'unit', 'year',
+        columns=['scenario', 'iso', 'unit', 'time',
                  'Primary Energy', 'Primary Energy|Coal'],
     )
     df = IamDataFrame(df_with_value_cols, model='model_a', region='iso',
                       value=['Primary Energy', 'Primary Energy|Coal'])
+    if "year" in meta_df.data.columns:
+        df = df.swap_time_for_year()
 
     assert compare(meta_df, df).empty
-    pd.testing.assert_frame_equal(df.data, meta_df.data)
+    pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
 
 
 def test_cast_with_model_arg_raises():
@@ -58,17 +63,21 @@ def test_cast_with_model_arg(meta_df):
 
 
 def test_cast_by_column_concat(meta_df):
+    dts = TEST_DTS
     df = pd.DataFrame([
         ['scen_a', 'World', 'Primary Energy', None, 'EJ/y', 1, 6.],
         ['scen_a', 'World', 'Primary Energy', 'Coal', 'EJ/y', 0.5, 3],
         ['scen_b', 'World', 'Primary Energy', None, 'EJ/y', 2, 7],
     ],
-        columns=['scenario', 'region', 'var_1', 'var_2', 'unit', 2005, 2010],
+        columns=['scenario', 'region', 'var_1', 'var_2', 'unit'] + dts,
     )
 
     df = IamDataFrame(df, model='model_a', variable=['var_1', 'var_2'])
+    if "year" in meta_df.data.columns:
+        df = df.swap_time_for_year()
+
     assert compare(meta_df, df).empty
-    pd.testing.assert_frame_equal(df.data, meta_df.data)
+    pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
 
 
 def test_cast_with_variable_and_value(meta_df):

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -8,10 +8,10 @@
 from numpy import testing as npt
 
 from pyam import IamDataFrame, validate, categorize, \
-    require_variable, filter_by_meta, META_IDX, IAMC_IDX, sort_data
+    require_variable, filter_by_meta, META_IDX, IAMC_IDX, sort_data, compare
 from pyam.core import _meta_idx, concat
 
-from conftest import TEST_DATA_DIR
+from conftest import TEST_DATA_DIR, TEST_DTS
 
 
 df_filter_by_meta_matching_idx = pd.DataFrame([
@@ -518,22 +518,36 @@ def test_validate_up(meta_df):
     obs = meta_df.validate({'Primary Energy': {'up': 6.5}},
                            exclude_on_fail=False)
     assert len(obs) == 1
-    assert obs['year'].values[0] == 2010
+    if 'year' in meta_df.data:
+        assert obs['year'].values[0] == 2010
+    else:
+        exp_time = pd.to_datetime(datetime.datetime(2010, 7, 21))
+        assert pd.to_datetime(obs['time'].values[0]) == exp_time
 
     assert list(meta_df['exclude']) == [False, False]  # assert none excluded
 
 
 def test_validate_lo(meta_df):
     obs = meta_df.validate({'Primary Energy': {'up': 8, 'lo': 2.0}})
     assert len(obs) == 1
-    assert obs['year'].values[0] == 2005
+    if 'year' in meta_df.data:
+        assert obs['year'].values[0] == 2005
+    else:
+        exp_year = pd.to_datetime(datetime.datetime(2005, 6, 17))
+        assert pd.to_datetime(obs['time'].values[0]) == exp_year
+
     assert list(obs['scenario'].values) == ['scen_a']
 
 
 def test_validate_both(meta_df):
     obs = meta_df.validate({'Primary Energy': {'up': 6.5, 'lo': 2.0}})
     assert len(obs) == 2
-    assert list(obs['year'].values) == [2005, 2010]
+    if 'year' in meta_df.data:
+        assert list(obs['year'].values) == [2005, 2010]
+    else:
+        exp_time = pd.to_datetime(TEST_DTS)
+        assert (pd.to_datetime(obs['time'].values) == exp_time).all()
+
     assert list(obs['scenario'].values) == ['scen_a', 'scen_b']
 
 
@@ -556,7 +570,11 @@ def test_validate_top_level(meta_df):
     obs = validate(meta_df, criteria={'Primary Energy': {'up': 6.0}},
                    exclude_on_fail=True, variable='Primary Energy')
     assert len(obs) == 1
-    assert obs['year'].values[0] == 2010
+    if 'year' in meta_df.data:
+        assert obs['year'].values[0] == 2010
+    else:
+        exp_time = pd.to_datetime(datetime.datetime(2010, 7, 21))
+        assert (pd.to_datetime(obs['time'].values[0]) == exp_time)
     assert list(meta_df['exclude']) == [False, True]
 
 
@@ -971,10 +989,51 @@ def test_normalize(meta_df):
     exp = meta_df.data.copy().reset_index(drop=True)
     exp['value'][1::2] /= exp['value'][::2].values
     exp['value'][::2] /= exp['value'][::2].values
-    obs = meta_df.normalize(year=2005).data.reset_index(drop=True)
+    if "year" in meta_df.data:
+        obs = meta_df.normalize(year=2005).data.reset_index(drop=True)
+    else:
+        obs = meta_df.normalize(
+            time=datetime.datetime(2005, 6, 17)
+        ).data.reset_index(drop=True)
     pd.testing.assert_frame_equal(obs, exp)
 
 
 def test_normalize_not_time(meta_df):
     pytest.raises(ValueError, meta_df.normalize, variable='foo')
     pytest.raises(ValueError, meta_df.normalize, year=2015, variable='foo')
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+def test_swap_time_to_year(test_df, inplace):
+    if "year" in test_df.data:
+        return  # year df not relevant for this test
+
+    exp = test_df.data.copy()
+    exp["year"] = exp["time"].apply(lambda x: x.year)
+    exp = exp.drop("time", axis="columns")
+    exp = IamDataFrame(exp)
+
+    obs = test_df.swap_time_for_year(inplace=inplace)
+
+    if inplace:
+        assert obs is None
+        assert compare(test_df, exp).empty
+    else:
+        assert compare(obs, exp).empty
+        assert "year" not in test_df.data.columns
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+def test_swap_time_to_year_errors(test_df, inplace):
+    if "year" in test_df.data:
+        with pytest.raises(ValueError):
+            test_df.swap_time_for_year(inplace=inplace)
+        return
+
+    tdf = test_df.data.copy()
+    tdf["time"] = tdf["time"].apply(
+        lambda x: datetime.datetime(2005, x.month, x.day)
+    )
+
+    with pytest.raises(ValueError):
+        IamDataFrame(tdf).swap_time_for_year(inplace=inplace)
diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py
@@ -2,20 +2,27 @@
 import pandas as pd
 from pyam import check_aggregate, IAMC_IDX
 
+from conftest import TEST_DTS
 
 def test_do_aggregate_append(meta_df):
     meta_df.rename({'variable': {'Primary Energy': 'Primary Energy|Gas'}},
                    inplace=True)
     meta_df.aggregate('Primary Energy', append=True)
     obs = meta_df.filter(variable='Primary Energy').timeseries()
 
+    dts = TEST_DTS
+    times = [2005, 2010] if "year" in meta_df.data else dts
     exp = pd.DataFrame([
         ['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 1.5, 9.],
         ['model_a', 'scen_b', 'World', 'Primary Energy', 'EJ/y', 2, 7],
     ],
-        columns=['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010]
+        columns=['model', 'scenario', 'region', 'variable', 'unit'] + times
     ).set_index(IAMC_IDX)
-    exp.columns = list(map(int, exp.columns))
+    if "year" in meta_df.data:
+        exp.columns = list(map(int, exp.columns))
+    else:
+        exp.columns = pd.to_datetime(exp.columns)
+
     pd.testing.assert_frame_equal(obs, exp)