Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend meta_df tests to include datetime #236

Merged
merged 18 commits into from
May 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@

# Next Release

- [#236](https://github.com/IAMconsortium/pyam/pull/236) Add `swap_time_for_year` method and confirm datetime column is compatible with pyam features

# Release v0.2.0

## Highlights
Expand Down
4 changes: 2 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ install:
- conda create -n testing python="%PYTHON_VERSION%" --yes
- conda --version
- activate testing
- conda install -y numpy pandas pyyaml xlrd xlsxwriter matplotlib==3.0.3 seaborn==0.9.0 six requests jupyter nbconvert
- conda install -y -c conda-forge libiconv gdal fiona "geopandas<0.5.0" cartopy
- conda install -y numpy pandas pyyaml xlrd xlsxwriter matplotlib==3.0.3 seaborn==0.9.0 six requests jupyter nbconvert proj4==5.2.0
- conda install -y -c conda-forge libiconv gdal fiona "geopandas<0.5.0" cartopy cython pyproj==1.9.6

build: false

Expand Down
1 change: 1 addition & 0 deletions ci/environment-conda-forge.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ libiconv
gdal
fiona
"geopandas<0.5.0"
cython
cartopy
34 changes: 34 additions & 0 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,36 @@ def interpolate(self, year):
fill_values['year'] = year
self.data = self.data.append(fill_values, ignore_index=True)

def swap_time_for_year(self, inplace=False):
"""Convert the `time` column to `year`.

Parameters
----------
inplace: bool, default False
if True, do operation inplace and return None

Raises
------
ValueError
"time" is not a column of `self.data`
"""
if "time" not in self.data:
raise ValueError("time column must be datetime to use this method")

ret = self.copy() if not inplace else self

ret.data["year"] = ret.data["time"].apply(lambda x: x.year)
znicholls marked this conversation as resolved.
Show resolved Hide resolved
ret.data = ret.data.drop("time", axis="columns")
ret._LONG_IDX = [v if v != "time" else "year" for v in ret._LONG_IDX]

if any(ret.data[ret._LONG_IDX].duplicated()):
error_msg = ('swapping time for year will result in duplicate '
'rows in `data`!')
raise ValueError(error_msg)

if not inplace:
return ret

def as_pandas(self, with_metadata=False):
"""Return this as a pd.DataFrame

Expand Down Expand Up @@ -1358,6 +1388,10 @@ def _check_rows(rows, check, in_range=True, return_test='any'):
msg = 'Unknown checking type: {}'
raise ValueError(msg.format(check.keys() - valid_checks))

if 'year' not in rows:
rows = rows.copy()
rows['year'] = rows['time'].apply(lambda x: x.year)

where_idx = set(rows.index[rows['year'] == check['year']]) \
if 'year' in check else set(rows.index)
rows = rows.loc[list(where_idx)]
Expand Down
36 changes: 24 additions & 12 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,19 @@
TEST_STACKPLOT_DF['scenario'] = 'a_scen'


TIME_AXES = [
[2005, 2010],
[datetime(2005, 6, 17), datetime(2010, 7, 21)],
['2005-06-17', '2010-07-21'],
['2005-06-17 00:00:00', '2010-07-21 12:00:00']
]


@pytest.fixture(scope="function", params=TIME_AXES)
TEST_YEARS = [2005, 2010]
TEST_DTS = [datetime(2005, 6, 17), datetime(2010, 7, 21)]


@pytest.fixture(
znicholls marked this conversation as resolved.
Show resolved Hide resolved
scope="function",
params=[
TEST_YEARS,
TEST_DTS,
['2005-06-17', '2010-07-21'],
['2005-06-17 00:00:00', '2010-07-21 12:00:00']
]
)
def test_df(request):
tdf = TEST_DF.iloc[:2]
tdf = tdf.rename({2005: request.param[0], 2010: request.param[1]},
Expand All @@ -195,9 +199,17 @@ def test_pd_df():
yield TEST_DF.copy()


@pytest.fixture(scope="function")
def meta_df():
df = IamDataFrame(data=TEST_DF)
@pytest.fixture(
scope="function",
params=[
TEST_YEARS,
TEST_DTS,
]
)
def meta_df(request):
mdf = TEST_DF.rename({2005: request.param[0], 2010: request.param[1]},
axis="columns")
df = IamDataFrame(data=mdf)
yield df


Expand Down
37 changes: 23 additions & 14 deletions tests/test_cast_to_iamc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,45 @@
import pandas as pd
from pyam import IamDataFrame, compare

from conftest import TEST_DTS

def test_cast_from_value_col(meta_df):
df_with_value_cols = pd.DataFrame([
['model_a', 'scen_a', 'World', 'EJ/y', 2005, 1, 0.5],
['model_a', 'scen_a', 'World', 'EJ/y', 2010, 6., 3],
['model_a', 'scen_b', 'World', 'EJ/y', 2005, 2, None],
['model_a', 'scen_b', 'World', 'EJ/y', 2010, 7, None]
['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5],
['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3],
['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None],
['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None]
],
columns=['model', 'scenario', 'region', 'unit', 'year',
columns=['model', 'scenario', 'region', 'unit', 'time',
'Primary Energy', 'Primary Energy|Coal'],
)
df = IamDataFrame(df_with_value_cols,
value=['Primary Energy', 'Primary Energy|Coal'])
if "year" in meta_df.data.columns:
df = df.swap_time_for_year()

assert compare(meta_df, df).empty
pd.testing.assert_frame_equal(df.data, meta_df.data)
pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)


def test_cast_from_value_col_and_args(meta_df):
# checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210)
df_with_value_cols = pd.DataFrame([
['scen_a', 'World', 'EJ/y', 2005, 1, 0.5],
['scen_a', 'World', 'EJ/y', 2010, 6., 3],
['scen_b', 'World', 'EJ/y', 2005, 2, None],
['scen_b', 'World', 'EJ/y', 2010, 7, None]
['scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5],
['scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3],
['scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None],
['scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None]
],
columns=['scenario', 'iso', 'unit', 'year',
columns=['scenario', 'iso', 'unit', 'time',
'Primary Energy', 'Primary Energy|Coal'],
)
df = IamDataFrame(df_with_value_cols, model='model_a', region='iso',
value=['Primary Energy', 'Primary Energy|Coal'])
if "year" in meta_df.data.columns:
df = df.swap_time_for_year()

assert compare(meta_df, df).empty
pd.testing.assert_frame_equal(df.data, meta_df.data)
pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)


def test_cast_with_model_arg_raises():
Expand All @@ -58,17 +63,21 @@ def test_cast_with_model_arg(meta_df):


def test_cast_by_column_concat(meta_df):
dts = TEST_DTS
df = pd.DataFrame([
['scen_a', 'World', 'Primary Energy', None, 'EJ/y', 1, 6.],
['scen_a', 'World', 'Primary Energy', 'Coal', 'EJ/y', 0.5, 3],
['scen_b', 'World', 'Primary Energy', None, 'EJ/y', 2, 7],
],
columns=['scenario', 'region', 'var_1', 'var_2', 'unit', 2005, 2010],
columns=['scenario', 'region', 'var_1', 'var_2', 'unit'] + dts,
)

df = IamDataFrame(df, model='model_a', variable=['var_1', 'var_2'])
if "year" in meta_df.data.columns:
df = df.swap_time_for_year()

assert compare(meta_df, df).empty
pd.testing.assert_frame_equal(df.data, meta_df.data)
pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)


def test_cast_with_variable_and_value(meta_df):
Expand Down
73 changes: 66 additions & 7 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from numpy import testing as npt

from pyam import IamDataFrame, validate, categorize, \
require_variable, filter_by_meta, META_IDX, IAMC_IDX, sort_data
require_variable, filter_by_meta, META_IDX, IAMC_IDX, sort_data, compare
from pyam.core import _meta_idx, concat

from conftest import TEST_DATA_DIR
from conftest import TEST_DATA_DIR, TEST_DTS


df_filter_by_meta_matching_idx = pd.DataFrame([
Expand Down Expand Up @@ -518,22 +518,36 @@ def test_validate_up(meta_df):
obs = meta_df.validate({'Primary Energy': {'up': 6.5}},
exclude_on_fail=False)
assert len(obs) == 1
assert obs['year'].values[0] == 2010
if 'year' in meta_df.data:
assert obs['year'].values[0] == 2010
else:
exp_time = pd.to_datetime(datetime.datetime(2010, 7, 21))
assert pd.to_datetime(obs['time'].values[0]) == exp_time

assert list(meta_df['exclude']) == [False, False] # assert none excluded


def test_validate_lo(meta_df):
obs = meta_df.validate({'Primary Energy': {'up': 8, 'lo': 2.0}})
assert len(obs) == 1
assert obs['year'].values[0] == 2005
if 'year' in meta_df.data:
assert obs['year'].values[0] == 2005
else:
exp_year = pd.to_datetime(datetime.datetime(2005, 6, 17))
assert pd.to_datetime(obs['time'].values[0]) == exp_year

assert list(obs['scenario'].values) == ['scen_a']


def test_validate_both(meta_df):
obs = meta_df.validate({'Primary Energy': {'up': 6.5, 'lo': 2.0}})
assert len(obs) == 2
assert list(obs['year'].values) == [2005, 2010]
if 'year' in meta_df.data:
assert list(obs['year'].values) == [2005, 2010]
else:
exp_time = pd.to_datetime(TEST_DTS)
assert (pd.to_datetime(obs['time'].values) == exp_time).all()

assert list(obs['scenario'].values) == ['scen_a', 'scen_b']


Expand All @@ -556,7 +570,11 @@ def test_validate_top_level(meta_df):
obs = validate(meta_df, criteria={'Primary Energy': {'up': 6.0}},
exclude_on_fail=True, variable='Primary Energy')
assert len(obs) == 1
assert obs['year'].values[0] == 2010
if 'year' in meta_df.data:
assert obs['year'].values[0] == 2010
else:
exp_time = pd.to_datetime(datetime.datetime(2010, 7, 21))
assert (pd.to_datetime(obs['time'].values[0]) == exp_time)
assert list(meta_df['exclude']) == [False, True]


Expand Down Expand Up @@ -971,10 +989,51 @@ def test_normalize(meta_df):
exp = meta_df.data.copy().reset_index(drop=True)
exp['value'][1::2] /= exp['value'][::2].values
exp['value'][::2] /= exp['value'][::2].values
obs = meta_df.normalize(year=2005).data.reset_index(drop=True)
if "year" in meta_df.data:
obs = meta_df.normalize(year=2005).data.reset_index(drop=True)
else:
obs = meta_df.normalize(
time=datetime.datetime(2005, 6, 17)
).data.reset_index(drop=True)
pd.testing.assert_frame_equal(obs, exp)


def test_normalize_not_time(meta_df):
pytest.raises(ValueError, meta_df.normalize, variable='foo')
pytest.raises(ValueError, meta_df.normalize, year=2015, variable='foo')


@pytest.mark.parametrize("inplace", [True, False])
def test_swap_time_to_year(test_df, inplace):
if "year" in test_df.data:
return # year df not relevant for this test

exp = test_df.data.copy()
exp["year"] = exp["time"].apply(lambda x: x.year)
exp = exp.drop("time", axis="columns")
exp = IamDataFrame(exp)

obs = test_df.swap_time_for_year(inplace=inplace)

if inplace:
assert obs is None
assert compare(test_df, exp).empty
else:
assert compare(obs, exp).empty
assert "year" not in test_df.data.columns


@pytest.mark.parametrize("inplace", [True, False])
def test_swap_time_to_year_errors(test_df, inplace):
if "year" in test_df.data:
with pytest.raises(ValueError):
test_df.swap_time_for_year(inplace=inplace)
return

tdf = test_df.data.copy()
tdf["time"] = tdf["time"].apply(
lambda x: datetime.datetime(2005, x.month, x.day)
)

with pytest.raises(ValueError):
IamDataFrame(tdf).swap_time_for_year(inplace=inplace)
11 changes: 9 additions & 2 deletions tests/test_feature_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,27 @@
import pandas as pd
from pyam import check_aggregate, IAMC_IDX

from conftest import TEST_DTS

def test_do_aggregate_append(meta_df):
meta_df.rename({'variable': {'Primary Energy': 'Primary Energy|Gas'}},
inplace=True)
meta_df.aggregate('Primary Energy', append=True)
obs = meta_df.filter(variable='Primary Energy').timeseries()

dts = TEST_DTS
times = [2005, 2010] if "year" in meta_df.data else dts
exp = pd.DataFrame([
['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 1.5, 9.],
['model_a', 'scen_b', 'World', 'Primary Energy', 'EJ/y', 2, 7],
],
columns=['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010]
columns=['model', 'scenario', 'region', 'variable', 'unit'] + times
).set_index(IAMC_IDX)
exp.columns = list(map(int, exp.columns))
if "year" in meta_df.data:
exp.columns = list(map(int, exp.columns))
else:
exp.columns = pd.to_datetime(exp.columns)

pd.testing.assert_frame_equal(obs, exp)


Expand Down
Loading