Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor to allow subclasses to set their own time format #177

Merged
merged 12 commits into from
Jan 25, 2019
18 changes: 18 additions & 0 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
read_files,
read_pandas,
format_data,
cast_years_to_int,
pattern_match,
years_match,
month_match,
Expand Down Expand Up @@ -72,6 +73,8 @@ def __init__(self, data, **kwargs):
_data = read_ix(data, **kwargs)
else:
_data = read_files(data, **kwargs)

_data = self._format_data_time_col(_data)
self.data, self.time_col, self.extra_cols = _data
self._LONG_IDX = IAMC_IDX + [self.time_col] + self.extra_cols

Expand All @@ -83,6 +86,21 @@ def __init__(self, data, **kwargs):
if 'exec' in run_control():
self._execute_run_control()

def _format_data_time_col(self, data):
df, time_col, extra_cols = data
# cast time_col to desired format
if time_col == 'year':
if not df.year.dtype == 'int64':
znicholls marked this conversation as resolved.
Show resolved Hide resolved
df['year'] = cast_years_to_int(pd.to_numeric(df['year']))
if time_col == 'time':
df = self._format_datetime_col(df)

return (df, time_col, extra_cols)

def _format_datetime_col(self, df):
df['time'] = pd.to_datetime(df['time'])
return df

def __getitem__(self, key):
_key_check = [key] if isstr(key) else key
if set(_key_check).issubset(self.meta.columns):
Expand Down
13 changes: 5 additions & 8 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import glob
import collections
import datetime
from dateutil import parser
import time

import numpy as np
Expand Down Expand Up @@ -172,8 +173,11 @@ def format_data(df):
try:
year_cols.append(i) if int(i) else None
except (ValueError, TypeError):
if isinstance(i, datetime.datetime):
time_cols.append(i)
continue
try:
pd.to_datetime([i])
parser.parse(i)
time_cols.append(i)
except ValueError:
extra_cols.append(i)
Expand All @@ -189,13 +193,6 @@ def format_data(df):
df = pd.melt(df, id_vars=IAMC_IDX + extra_cols, var_name=time_col,
value_vars=sorted(melt_cols), value_name='value')

# cast time_col to correct format
if time_col == 'year':
if not df.year.dtype == 'int64':
df['year'] = cast_years_to_int(pd.to_numeric(df['year']))
if time_col == 'time':
df['time'] = pd.to_datetime(df['time'])

# cast value columns to numeric, drop NaN's, sort data
df['value'] = df['value'].astype('float64')
df.dropna(inplace=True)
Expand Down
44 changes: 44 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,50 @@ def test_init_df_with_extra_col(test_pd_df):
tdf, check_like=True)


@pytest.mark.xfail(reason=(
"pandas datetime is limited to ~584 year timespan, see "
"https://stackoverflow.com/a/37226672"
))
def test_init_df_long_timespan(test_pd_df):
znicholls marked this conversation as resolved.
Show resolved Hide resolved
tdf = test_pd_df.copy()
tmin = datetime.datetime(2005, 6, 17)
tmax = datetime.datetime(3005, 6, 17)
tdf = tdf.rename(
{
2005: tmin,
2010: tmax,
},
axis="columns"
)

df = IamDataFrame(tdf)

assert df["time"].max() == tmax
assert df["time"].min() == tmin



def test_subclass_passesinit_df_long_timespan(test_pd_df):
znicholls marked this conversation as resolved.
Show resolved Hide resolved
class TempSubClass(IamDataFrame):
def _format_datetime_col(self, df):
return df

tdf = test_pd_df.copy()
tmin = datetime.datetime(2005, 6, 17)
tmax = datetime.datetime(3005, 6, 17)
tdf = tdf.rename(
{
2005: tmin,
2010: tmax,
},
axis="columns"
)

df = TempSubClass(tdf)

assert df["time"].max() == tmax
assert df["time"].min() == tmin


def test_to_excel(test_df):
fname = 'foo_testing.xlsx'
Expand Down