hotfix for pyam aggregation and filter in the wild (#254)

* add test that fails * test now passes * another test that shows bitwise-or failure * fix issue with filtering empty dataframes * additional tests for behavior observed by @zikolach * stickler * add to release notes * Fix tests according to suggestions
IAMconsortium · Aug 20, 2019 · d2fa051 · d2fa051
1 parent 935864d
commit d2fa051
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 4 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -2,6 +2,7 @@
 # Next Release
 
 - [#261](https://github.com/IAMconsortium/pyam/pull/261) Add a check that `keep` in `filter()` is a boolean
+- [#254](https://github.com/IAMconsortium/pyam/pull/254) Hotfix for aggregating missing regions and filtering empty dataframes
 - [#243](https://github.com/IAMconsortium/pyam/pull/243) Update `pyam.iiasa.Connection` to support all public and private database connections. DEPRECATED: the argument 'iamc15' has been deprecated in favor of names as queryable directly from the REST API.
 - [#241](https://github.com/IAMconsortium/pyam/pull/241) Add `set_meta_from_data` feature
 - [#236](https://github.com/IAMconsortium/pyam/pull/236) Add `swap_time_for_year` method and confirm datetime column is compatible with pyam features

diff --git a/pyam/core.py b/pyam/core.py
@@ -1063,7 +1063,7 @@ def _apply_filters(self, **filters):
             else:
                 _raise_filter_error(col)
 
-            keep &= keep_col
+            keep = np.logical_and(keep, keep_col)
 
         return keep
 

diff --git a/pyam/utils.py b/pyam/utils.py
@@ -323,9 +323,9 @@ def pattern_match(data, values, level=None, regexp=False, has_nan=True):
             pattern = re.compile(_escape_regexp(s) + '$' if not regexp else s)
             subset = filter(pattern.match, _data)
             depth = True if level is None else find_depth(_data, s, level)
-            matches |= (_data.isin(subset) & depth)
+            matches = np.logical_or(matches, _data.isin(subset) & depth)
         else:
-            matches |= data == s
+            matches = np.logical_or(matches, data == s)
     return matches
 
 

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -182,6 +182,15 @@ def test_variable_unit(test_df):
     npt.assert_array_equal(test_df.variables(include_units=True), exp)
 
 
+def test_filter_empty_df():
+    # test for issue seen in #254
+    cols = IAMC_IDX + [2005, 2010]
+    data = pd.DataFrame([], columns=cols)
+    df = IamDataFrame(data=data)
+    obs = df.filter(variable='foo')
+    assert len(obs) == 0
+
+
 def test_filter_variable_and_depth(test_df):
     obs = list(test_df.filter(variable='*rimary*C*', level=0).variables())
     exp = ['Primary Energy|Coal']

diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py
@@ -1,9 +1,56 @@
 import numpy as np
 import pandas as pd
-from pyam import check_aggregate, IAMC_IDX
+from pyam import check_aggregate, IamDataFrame, IAMC_IDX
 
 from conftest import TEST_DTS
 
+
+def test_missing_region(check_aggregate_df):
+    # for now, this test makes sure that this operation works as expected
+    exp = check_aggregate_df.aggregate_region('Primary Energy', region='foo')
+    assert len(exp) == 8
+    # # this test should be updated to the below after the return type of
+    # # aggregate_region() is updated
+    # exp = check_aggregate_df.aggregate_region(
+    #     'Primary Energy', region='foo', append=False
+    # ).data
+    # check_aggregate_df.aggregate_region(
+    #     'Primary Energy', region='foo', append=True
+    # )
+    # obs = check_aggregate_df.filter(region='foo').data
+    # assert len(exp) > 0
+    # pd.testing.assert_frame_equal(obs.reset_index(drop=True),
+    #                               exp.reset_index(drop=True))
+
+
+def test_aggregate_region_extra_subregion():
+    cols = ['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010]
+    data = pd.DataFrame([
+        ['model_a', 'scen_a', 'foo', 'Primary Energy', 'EJ/y', 1, 6],
+        ['model_a', 'scen_a', 'bar', 'Primary Energy', 'EJ/y', 0.75, 5]],
+        columns=cols)
+    df = IamDataFrame(data=data)
+    obs = df.aggregate_region(variable='Primary Energy',
+                              region='R5ASIA',
+                              subregions=['foo', 'bar', 'baz'],
+                              components=[], append=False)
+    assert len(obs) == 2
+
+
+def test_aggregate_region_missing_all_subregions():
+    cols = ['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010]
+    data = pd.DataFrame([
+        ['model_a', 'scen_a', 'foo', 'Primary Energy', 'EJ/y', 1, 6],
+        ['model_a', 'scen_a', 'bar', 'Primary Energy', 'EJ/y', 0.75, 5]],
+        columns=cols)
+    df = IamDataFrame(data=data)
+    obs = df.aggregate_region(variable='Primary Energy',
+                              region='R5ASIA',
+                              subregions=['China', 'Vietnam', 'Japan']
+                              )
+    assert len(obs) == 0
+
+
 def test_do_aggregate_append(meta_df):
     meta_df.rename({'variable': {'Primary Energy': 'Primary Energy|Gas'}},
                    inplace=True)