Skip to content

Commit

Permalink
extend meta_df tests to include datetime (#236)
Browse files Browse the repository at this point in the history
  • Loading branch information
znicholls authored and danielhuppmann committed May 29, 2019
1 parent 25aa7f0 commit 1d60716
Show file tree
Hide file tree
Showing 10 changed files with 194 additions and 49 deletions.
2 changes: 2 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@

# Next Release

- [#236](https://github.com/IAMconsortium/pyam/pull/236) Add `swap_time_for_year` method and confirm datetime column is compatible with pyam features

# Release v0.2.0

## Highlights
Expand Down
4 changes: 2 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ install:
- conda create -n testing python="%PYTHON_VERSION%" --yes
- conda --version
- activate testing
- conda install -y numpy pandas pyyaml xlrd xlsxwriter matplotlib==3.0.3 seaborn==0.9.0 six requests jupyter nbconvert
- conda install -y -c conda-forge libiconv gdal fiona "geopandas<0.5.0" cartopy
- conda install -y numpy pandas pyyaml xlrd xlsxwriter matplotlib==3.0.3 seaborn==0.9.0 six requests jupyter nbconvert proj4==5.2.0
- conda install -y -c conda-forge libiconv gdal fiona "geopandas<0.5.0" cartopy cython pyproj==1.9.6

build: false

Expand Down
1 change: 1 addition & 0 deletions ci/environment-conda-forge.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ libiconv
gdal
fiona
"geopandas<0.5.0"
cython
cartopy
34 changes: 34 additions & 0 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,36 @@ def interpolate(self, year):
fill_values['year'] = year
self.data = self.data.append(fill_values, ignore_index=True)

def swap_time_for_year(self, inplace=False):
"""Convert the `time` column to `year`.
Parameters
----------
inplace: bool, default False
if True, do operation inplace and return None
Raises
------
ValueError
"time" is not a column of `self.data`
"""
if "time" not in self.data:
raise ValueError("time column must be datetime to use this method")

ret = self.copy() if not inplace else self

ret.data["year"] = ret.data["time"].apply(lambda x: x.year)
ret.data = ret.data.drop("time", axis="columns")
ret._LONG_IDX = [v if v != "time" else "year" for v in ret._LONG_IDX]

if any(ret.data[ret._LONG_IDX].duplicated()):
error_msg = ('swapping time for year will result in duplicate '
'rows in `data`!')
raise ValueError(error_msg)

if not inplace:
return ret

def as_pandas(self, with_metadata=False):
"""Return this as a pd.DataFrame
Expand Down Expand Up @@ -1364,6 +1394,10 @@ def _check_rows(rows, check, in_range=True, return_test='any'):
msg = 'Unknown checking type: {}'
raise ValueError(msg.format(check.keys() - valid_checks))

if 'year' not in rows:
rows = rows.copy()
rows['year'] = rows['time'].apply(lambda x: x.year)

where_idx = set(rows.index[rows['year'] == check['year']]) \
if 'year' in check else set(rows.index)
rows = rows.loc[list(where_idx)]
Expand Down
36 changes: 24 additions & 12 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,19 @@
TEST_STACKPLOT_DF['scenario'] = 'a_scen'


TIME_AXES = [
[2005, 2010],
[datetime(2005, 6, 17), datetime(2010, 7, 21)],
['2005-06-17', '2010-07-21'],
['2005-06-17 00:00:00', '2010-07-21 12:00:00']
]


@pytest.fixture(scope="function", params=TIME_AXES)
TEST_YEARS = [2005, 2010]
TEST_DTS = [datetime(2005, 6, 17), datetime(2010, 7, 21)]


@pytest.fixture(
scope="function",
params=[
TEST_YEARS,
TEST_DTS,
['2005-06-17', '2010-07-21'],
['2005-06-17 00:00:00', '2010-07-21 12:00:00']
]
)
def test_df(request):
tdf = TEST_DF.iloc[:2]
tdf = tdf.rename({2005: request.param[0], 2010: request.param[1]},
Expand All @@ -195,9 +199,17 @@ def test_pd_df():
yield TEST_DF.copy()


@pytest.fixture(scope="function")
def meta_df():
df = IamDataFrame(data=TEST_DF)
@pytest.fixture(
scope="function",
params=[
TEST_YEARS,
TEST_DTS,
]
)
def meta_df(request):
mdf = TEST_DF.rename({2005: request.param[0], 2010: request.param[1]},
axis="columns")
df = IamDataFrame(data=mdf)
yield df


Expand Down
37 changes: 23 additions & 14 deletions tests/test_cast_to_iamc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,45 @@
import pandas as pd
from pyam import IamDataFrame, compare

from conftest import TEST_DTS

def test_cast_from_value_col(meta_df):
df_with_value_cols = pd.DataFrame([
['model_a', 'scen_a', 'World', 'EJ/y', 2005, 1, 0.5],
['model_a', 'scen_a', 'World', 'EJ/y', 2010, 6., 3],
['model_a', 'scen_b', 'World', 'EJ/y', 2005, 2, None],
['model_a', 'scen_b', 'World', 'EJ/y', 2010, 7, None]
['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5],
['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3],
['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None],
['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None]
],
columns=['model', 'scenario', 'region', 'unit', 'year',
columns=['model', 'scenario', 'region', 'unit', 'time',
'Primary Energy', 'Primary Energy|Coal'],
)
df = IamDataFrame(df_with_value_cols,
value=['Primary Energy', 'Primary Energy|Coal'])
if "year" in meta_df.data.columns:
df = df.swap_time_for_year()

assert compare(meta_df, df).empty
pd.testing.assert_frame_equal(df.data, meta_df.data)
pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)


def test_cast_from_value_col_and_args(meta_df):
# checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210)
df_with_value_cols = pd.DataFrame([
['scen_a', 'World', 'EJ/y', 2005, 1, 0.5],
['scen_a', 'World', 'EJ/y', 2010, 6., 3],
['scen_b', 'World', 'EJ/y', 2005, 2, None],
['scen_b', 'World', 'EJ/y', 2010, 7, None]
['scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5],
['scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3],
['scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None],
['scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None]
],
columns=['scenario', 'iso', 'unit', 'year',
columns=['scenario', 'iso', 'unit', 'time',
'Primary Energy', 'Primary Energy|Coal'],
)
df = IamDataFrame(df_with_value_cols, model='model_a', region='iso',
value=['Primary Energy', 'Primary Energy|Coal'])
if "year" in meta_df.data.columns:
df = df.swap_time_for_year()

assert compare(meta_df, df).empty
pd.testing.assert_frame_equal(df.data, meta_df.data)
pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)


def test_cast_with_model_arg_raises():
Expand All @@ -58,17 +63,21 @@ def test_cast_with_model_arg(meta_df):


def test_cast_by_column_concat(meta_df):
dts = TEST_DTS
df = pd.DataFrame([
['scen_a', 'World', 'Primary Energy', None, 'EJ/y', 1, 6.],
['scen_a', 'World', 'Primary Energy', 'Coal', 'EJ/y', 0.5, 3],
['scen_b', 'World', 'Primary Energy', None, 'EJ/y', 2, 7],
],
columns=['scenario', 'region', 'var_1', 'var_2', 'unit', 2005, 2010],
columns=['scenario', 'region', 'var_1', 'var_2', 'unit'] + dts,
)

df = IamDataFrame(df, model='model_a', variable=['var_1', 'var_2'])
if "year" in meta_df.data.columns:
df = df.swap_time_for_year()

assert compare(meta_df, df).empty
pd.testing.assert_frame_equal(df.data, meta_df.data)
pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)


def test_cast_with_variable_and_value(meta_df):
Expand Down
73 changes: 66 additions & 7 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from numpy import testing as npt

from pyam import IamDataFrame, validate, categorize, \
require_variable, filter_by_meta, META_IDX, IAMC_IDX, sort_data
require_variable, filter_by_meta, META_IDX, IAMC_IDX, sort_data, compare
from pyam.core import _meta_idx, concat

from conftest import TEST_DATA_DIR
from conftest import TEST_DATA_DIR, TEST_DTS


df_filter_by_meta_matching_idx = pd.DataFrame([
Expand Down Expand Up @@ -518,22 +518,36 @@ def test_validate_up(meta_df):
obs = meta_df.validate({'Primary Energy': {'up': 6.5}},
exclude_on_fail=False)
assert len(obs) == 1
assert obs['year'].values[0] == 2010
if 'year' in meta_df.data:
assert obs['year'].values[0] == 2010
else:
exp_time = pd.to_datetime(datetime.datetime(2010, 7, 21))
assert pd.to_datetime(obs['time'].values[0]) == exp_time

assert list(meta_df['exclude']) == [False, False] # assert none excluded


def test_validate_lo(meta_df):
obs = meta_df.validate({'Primary Energy': {'up': 8, 'lo': 2.0}})
assert len(obs) == 1
assert obs['year'].values[0] == 2005
if 'year' in meta_df.data:
assert obs['year'].values[0] == 2005
else:
exp_year = pd.to_datetime(datetime.datetime(2005, 6, 17))
assert pd.to_datetime(obs['time'].values[0]) == exp_year

assert list(obs['scenario'].values) == ['scen_a']


def test_validate_both(meta_df):
obs = meta_df.validate({'Primary Energy': {'up': 6.5, 'lo': 2.0}})
assert len(obs) == 2
assert list(obs['year'].values) == [2005, 2010]
if 'year' in meta_df.data:
assert list(obs['year'].values) == [2005, 2010]
else:
exp_time = pd.to_datetime(TEST_DTS)
assert (pd.to_datetime(obs['time'].values) == exp_time).all()

assert list(obs['scenario'].values) == ['scen_a', 'scen_b']


Expand All @@ -556,7 +570,11 @@ def test_validate_top_level(meta_df):
obs = validate(meta_df, criteria={'Primary Energy': {'up': 6.0}},
exclude_on_fail=True, variable='Primary Energy')
assert len(obs) == 1
assert obs['year'].values[0] == 2010
if 'year' in meta_df.data:
assert obs['year'].values[0] == 2010
else:
exp_time = pd.to_datetime(datetime.datetime(2010, 7, 21))
assert (pd.to_datetime(obs['time'].values[0]) == exp_time)
assert list(meta_df['exclude']) == [False, True]


Expand Down Expand Up @@ -971,10 +989,51 @@ def test_normalize(meta_df):
exp = meta_df.data.copy().reset_index(drop=True)
exp['value'][1::2] /= exp['value'][::2].values
exp['value'][::2] /= exp['value'][::2].values
obs = meta_df.normalize(year=2005).data.reset_index(drop=True)
if "year" in meta_df.data:
obs = meta_df.normalize(year=2005).data.reset_index(drop=True)
else:
obs = meta_df.normalize(
time=datetime.datetime(2005, 6, 17)
).data.reset_index(drop=True)
pd.testing.assert_frame_equal(obs, exp)


def test_normalize_not_time(meta_df):
pytest.raises(ValueError, meta_df.normalize, variable='foo')
pytest.raises(ValueError, meta_df.normalize, year=2015, variable='foo')


@pytest.mark.parametrize("inplace", [True, False])
def test_swap_time_to_year(test_df, inplace):
if "year" in test_df.data:
return # year df not relevant for this test

exp = test_df.data.copy()
exp["year"] = exp["time"].apply(lambda x: x.year)
exp = exp.drop("time", axis="columns")
exp = IamDataFrame(exp)

obs = test_df.swap_time_for_year(inplace=inplace)

if inplace:
assert obs is None
assert compare(test_df, exp).empty
else:
assert compare(obs, exp).empty
assert "year" not in test_df.data.columns


@pytest.mark.parametrize("inplace", [True, False])
def test_swap_time_to_year_errors(test_df, inplace):
if "year" in test_df.data:
with pytest.raises(ValueError):
test_df.swap_time_for_year(inplace=inplace)
return

tdf = test_df.data.copy()
tdf["time"] = tdf["time"].apply(
lambda x: datetime.datetime(2005, x.month, x.day)
)

with pytest.raises(ValueError):
IamDataFrame(tdf).swap_time_for_year(inplace=inplace)
11 changes: 9 additions & 2 deletions tests/test_feature_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,27 @@
import pandas as pd
from pyam import check_aggregate, IAMC_IDX

from conftest import TEST_DTS

def test_do_aggregate_append(meta_df):
meta_df.rename({'variable': {'Primary Energy': 'Primary Energy|Gas'}},
inplace=True)
meta_df.aggregate('Primary Energy', append=True)
obs = meta_df.filter(variable='Primary Energy').timeseries()

dts = TEST_DTS
times = [2005, 2010] if "year" in meta_df.data else dts
exp = pd.DataFrame([
['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 1.5, 9.],
['model_a', 'scen_b', 'World', 'Primary Energy', 'EJ/y', 2, 7],
],
columns=['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010]
columns=['model', 'scenario', 'region', 'variable', 'unit'] + times
).set_index(IAMC_IDX)
exp.columns = list(map(int, exp.columns))
if "year" in meta_df.data:
exp.columns = list(map(int, exp.columns))
else:
exp.columns = pd.to_datetime(exp.columns)

pd.testing.assert_frame_equal(obs, exp)


Expand Down
Loading

0 comments on commit 1d60716

Please sign in to comment.