Skip to content

Commit

Permalink
hotfix for pyam aggregation and filter in the wild (#254)
Browse files Browse the repository at this point in the history
* add test that fails

* test now passes

* another test that shows bitwise-or failure

* fix issue with filtering empty dataframes

* additional tests for behavior observed by @zikolach

* stickler

* add to release notes

* Fix tests according to suggestions
  • Loading branch information
gidden authored Aug 20, 2019
1 parent 935864d commit d2fa051
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 4 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Next Release

- [#261](https://github.com/IAMconsortium/pyam/pull/261) Add a check that `keep` in `filter()` is a boolean
- [#254](https://github.com/IAMconsortium/pyam/pull/254) Hotfix for aggregating missing regions and filtering empty dataframes
- [#243](https://github.com/IAMconsortium/pyam/pull/243) Update `pyam.iiasa.Connection` to support all public and private database connections. DEPRECATED: the argument 'iamc15' has been deprecated in favor of names as queryable directly from the REST API.
- [#241](https://github.com/IAMconsortium/pyam/pull/241) Add `set_meta_from_data` feature
- [#236](https://github.com/IAMconsortium/pyam/pull/236) Add `swap_time_for_year` method and confirm datetime column is compatible with pyam features
Expand Down
2 changes: 1 addition & 1 deletion pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,7 +1063,7 @@ def _apply_filters(self, **filters):
else:
_raise_filter_error(col)

keep &= keep_col
keep = np.logical_and(keep, keep_col)

return keep

Expand Down
4 changes: 2 additions & 2 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,9 @@ def pattern_match(data, values, level=None, regexp=False, has_nan=True):
pattern = re.compile(_escape_regexp(s) + '$' if not regexp else s)
subset = filter(pattern.match, _data)
depth = True if level is None else find_depth(_data, s, level)
matches |= (_data.isin(subset) & depth)
matches = np.logical_or(matches, _data.isin(subset) & depth)
else:
matches |= data == s
matches = np.logical_or(matches, data == s)
return matches


Expand Down
9 changes: 9 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,15 @@ def test_variable_unit(test_df):
npt.assert_array_equal(test_df.variables(include_units=True), exp)


def test_filter_empty_df():
# test for issue seen in #254
cols = IAMC_IDX + [2005, 2010]
data = pd.DataFrame([], columns=cols)
df = IamDataFrame(data=data)
obs = df.filter(variable='foo')
assert len(obs) == 0


def test_filter_variable_and_depth(test_df):
obs = list(test_df.filter(variable='*rimary*C*', level=0).variables())
exp = ['Primary Energy|Coal']
Expand Down
49 changes: 48 additions & 1 deletion tests/test_feature_aggregate.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,56 @@
import numpy as np
import pandas as pd
from pyam import check_aggregate, IAMC_IDX
from pyam import check_aggregate, IamDataFrame, IAMC_IDX

from conftest import TEST_DTS


def test_missing_region(check_aggregate_df):
# for now, this test makes sure that this operation works as expected
exp = check_aggregate_df.aggregate_region('Primary Energy', region='foo')
assert len(exp) == 8
# # this test should be updated to the below after the return type of
# # aggregate_region() is updated
# exp = check_aggregate_df.aggregate_region(
# 'Primary Energy', region='foo', append=False
# ).data
# check_aggregate_df.aggregate_region(
# 'Primary Energy', region='foo', append=True
# )
# obs = check_aggregate_df.filter(region='foo').data
# assert len(exp) > 0
# pd.testing.assert_frame_equal(obs.reset_index(drop=True),
# exp.reset_index(drop=True))


def test_aggregate_region_extra_subregion():
cols = ['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010]
data = pd.DataFrame([
['model_a', 'scen_a', 'foo', 'Primary Energy', 'EJ/y', 1, 6],
['model_a', 'scen_a', 'bar', 'Primary Energy', 'EJ/y', 0.75, 5]],
columns=cols)
df = IamDataFrame(data=data)
obs = df.aggregate_region(variable='Primary Energy',
region='R5ASIA',
subregions=['foo', 'bar', 'baz'],
components=[], append=False)
assert len(obs) == 2


def test_aggregate_region_missing_all_subregions():
cols = ['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010]
data = pd.DataFrame([
['model_a', 'scen_a', 'foo', 'Primary Energy', 'EJ/y', 1, 6],
['model_a', 'scen_a', 'bar', 'Primary Energy', 'EJ/y', 0.75, 5]],
columns=cols)
df = IamDataFrame(data=data)
obs = df.aggregate_region(variable='Primary Energy',
region='R5ASIA',
subregions=['China', 'Vietnam', 'Japan']
)
assert len(obs) == 0


def test_do_aggregate_append(meta_df):
meta_df.rename({'variable': {'Primary Energy': 'Primary Energy|Gas'}},
inplace=True)
Expand Down

0 comments on commit d2fa051

Please sign in to comment.