Update logging (#288)

* Add failing test * Namespace logging * RELEASE NOTES * Clean up
IAMconsortium · Nov 12, 2019 · 103b60f · 103b60f
1 parent d091746
commit 103b60f
Show file tree

Hide file tree

Showing 12 changed files with 92 additions and 73 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,6 +1,8 @@
 
 # Next Release
 
+- [#288](https://github.com/IAMconsortium/pyam/pull/288) Put `pyam` logger in its own namespace (see [here](https://docs.python-guide.org/writing/logging/#logging-in-a-library>))
+
 # Release v0.3.0
 
 ## Highlights
@@ -23,7 +25,7 @@
 - [#243](https://github.com/IAMconsortium/pyam/pull/243) Update `pyam.iiasa.Connection` to support all public and private database connections. DEPRECATED: the argument 'iamc15' has been deprecated in favor of names as queryable directly from the REST API.
 - [#241](https://github.com/IAMconsortium/pyam/pull/241) Add `set_meta_from_data` feature
 - [#236](https://github.com/IAMconsortium/pyam/pull/236) Add `swap_time_for_year` method and confirm datetime column is compatible with pyam features
-- [#273](https://github.com/IAMconsortium/pyam/pull/273) Fix several issues accessing IXMP API (passing correct credentials, improve reliability for optional fields in result payload) 
+- [#273](https://github.com/IAMconsortium/pyam/pull/273) Fix several issues accessing IXMP API (passing correct credentials, improve reliability for optional fields in result payload)
 
 # Release v0.2.0
 

diff --git a/pyam/__init__.py b/pyam/__init__.py
@@ -1,9 +1,11 @@
+import logging
+
 from pyam.core import *
 from pyam.utils import *
 from pyam.statistics import *
 from pyam.timeseries import *
 from pyam.read_ixmp import *
-from pyam.logger import *
+from pyam.logging import *
 from pyam.run_control import *
 from pyam.iiasa import read_iiasa  # noqa: F401
 
@@ -23,3 +25,5 @@ def custom_formatwarning(msg, category, filename, lineno, line=''):
 from ._version import get_versions
 __version__ = get_versions()['version']
 del get_versions
+
+logging.getLogger(__name__).addHandler(logging.NullHandler())
diff --git a/pyam/core.py b/pyam/core.py
@@ -1,6 +1,7 @@
 import copy
 import importlib
 import itertools
+import logging
 import os
 import sys
 
@@ -15,7 +16,7 @@
 
 from pyam import plotting
 
-from pyam.logger import logger, adjust_log_level
+from pyam.logging import adjust_log_level
 from pyam.run_control import run_control
 from pyam.utils import (
     write_sheet,
@@ -41,6 +42,8 @@
 from pyam.read_ixmp import read_ix
 from pyam.timeseries import fill_series
 
+logger = logging.getLogger(__name__)
+
 
 class IamDataFrame(object):
     """This class is a wrapper for dataframes following the IAMC format.
@@ -512,14 +515,14 @@ def categorize(self, name, value, criteria,
         idx = _meta_idx(rows)
 
         if len(idx) == 0:
-            logger().info("No scenarios satisfy the criteria")
+            logger.info("No scenarios satisfy the criteria")
             return  # EXIT FUNCTION
 
         # update metadata dataframe
         self._new_meta_column(name)
         self.meta.loc[idx, name] = value
         msg = '{} scenario{} categorized as `{}: {}`'
-        logger().info(msg.format(len(idx), '' if len(idx) == 1 else 's',
+        logger.info(msg.format(len(idx), '' if len(idx) == 1 else 's',
                                  name, value))
 
     def _new_meta_column(self, name):
@@ -555,7 +558,7 @@ def require_variable(self, variable, unit=None, year=None,
 
         n = len(idx)
         if n == 0:
-            logger().info('All scenarios have the required variable `{}`'
+            logger.info('All scenarios have the required variable `{}`'
                           .format(variable))
             return
 
@@ -566,7 +569,7 @@ def require_variable(self, variable, unit=None, year=None,
             self.meta.loc[idx, 'exclude'] = True
             msg += ', marked as `exclude: True` in metadata'
 
-        logger().info(msg.format(n, variable))
+        logger.info(msg.format(n, variable))
         return pd.DataFrame(index=idx).reset_index()
 
     def validate(self, criteria={}, exclude_on_fail=False):
@@ -590,7 +593,7 @@ def validate(self, criteria={}, exclude_on_fail=False):
 
         if not df.empty:
             msg = '{} of {} data points do not satisfy the criteria'
-            logger().info(msg.format(len(df), len(self.data)))
+            logger.info(msg.format(len(df), len(self.data)))
 
             if exclude_on_fail and len(df) > 0:
                 self._exclude_on_fail(df)
@@ -751,7 +754,7 @@ def aggregate(self, variable, components=None, append=False):
 
         if not len(components):
             msg = 'cannot aggregate variable `{}` because it has no components'
-            logger().info(msg.format(variable))
+            logger.info(msg.format(variable))
 
             return
 
@@ -796,7 +799,7 @@ def check_aggregate(self, variable, components=None, exclude_on_fail=False,
 
         if len(diff):
             msg = '`{}` - {} of {} rows are not aggregates of components'
-            logger().info(msg.format(variable, len(diff), len(df_variable)))
+            logger.info(msg.format(variable, len(diff), len(df_variable)))
 
             if exclude_on_fail:
                 self._exclude_on_fail(diff.index.droplevel([2, 3, 4]))
@@ -832,7 +835,7 @@ def aggregate_region(self, variable, region='World', subregions=None,
         if not len(subregions):
             msg = 'cannot aggregate variable `{}` to `{}` because it does not'\
                   ' exist in any subregion'
-            logger().info(msg.format(variable, region))
+            logger.info(msg.format(variable, region))
 
             return
 
@@ -843,7 +846,7 @@ def aggregate_region(self, variable, region='World', subregions=None,
 
         # add components at the `region` level, defaults to all variables one
         # level below `variable` that are only present in `region`
-        with adjust_log_level():
+        with adjust_log_level(logger):
             region_df = self.filter(region=region)
 
         rdf_comps = region_df._variable_components(variable, level=None)
@@ -891,7 +894,7 @@ def check_aggregate_region(self, variable, region='World', subregions=None,
         rows = self._apply_filters(region=region, variable=variable)
         if not rows.any():
             msg = 'variable `{}` does not exist in region `{}`'
-            logger().info(msg.format(variable, region))
+            logger.info(msg.format(variable, region))
             return
 
         df_region, df_subregions = (
@@ -906,7 +909,7 @@ def check_aggregate_region(self, variable, region='World', subregions=None,
             msg = (
                 '`{}` - {} of {} rows are not aggregates of subregions'
             )
-            logger().info(msg.format(variable, len(diff), len(df_region)))
+            logger.info(msg.format(variable, len(diff), len(df_region)))
 
             if exclude_on_fail:
                 self._exclude_on_fail(diff.index.droplevel([2, 3]))
@@ -958,7 +961,7 @@ def _exclude_on_fail(self, df):
         """Assign a selection of scenarios as `exclude: True` in meta"""
         idx = df if isinstance(df, pd.MultiIndex) else _meta_idx(df)
         self.meta.loc[idx, 'exclude'] = True
-        logger().info('{} non-valid scenario{} will be excluded'
+        logger.info('{} non-valid scenario{} will be excluded'
                       .format(len(idx), '' if len(idx) == 1 else 's'))
 
     def filter(self, keep=True, inplace=False, **kwargs):
@@ -995,7 +998,7 @@ def filter(self, keep=True, inplace=False, **kwargs):
 
         idx = _make_index(ret.data)
         if len(idx) == 0:
-            logger().warning('Filtered IamDataFrame is empty!')
+            logger.warning('Filtered IamDataFrame is empty!')
         ret.meta = ret.meta.loc[idx]
         if not inplace:
             return ret
@@ -1177,7 +1180,7 @@ def load_metadata(self, path, *args, **kwargs):
         n_invalid = len(df) - len(idx)
         if n_invalid > 0:
             msg = 'Ignoring {} scenario{} from imported metadata'
-            logger().info(msg.format(n_invalid, 's' if n_invalid > 1 else ''))
+            logger.info(msg.format(n_invalid, 's' if n_invalid > 1 else ''))
 
         if idx.empty:
             raise ValueError('No valid scenarios in imported metadata file!')
@@ -1186,7 +1189,7 @@ def load_metadata(self, path, *args, **kwargs):
 
         # Merge in imported metadata
         msg = 'Importing metadata for {} scenario{} (for total of {})'
-        logger().info(msg.format(len(df), 's' if len(df) > 1 else '',
+        logger.info(msg.format(len(df), 's' if len(df) > 1 else '',
                                  len(self.meta)))
 
         for col in df.columns:
@@ -1342,7 +1345,7 @@ def map_regions(self, map_col, agg=None, copy_col=None, fname=None,
                 # find duplicates
                 where_dup = _map['region'].duplicated(keep=False)
                 dups = _map[where_dup]
-                logger().warning("""
+                logger.warning("""
                 Duplicate entries found for the following regions.
                 Mapping will occur only for the most common instance.
                 {}""".format(dups['region'].unique()))

diff --git a/pyam/iiasa.py b/pyam/iiasa.py
@@ -15,9 +15,9 @@
     from functools32 import lru_cache
 
 from pyam.core import IamDataFrame
-from pyam.logger import logger
 from pyam.utils import META_IDX, islistable, isstr, pattern_match
 
+logger = logging.getLogger(__name__)
 # quiet this fool
 logging.getLogger('requests').setLevel(logging.WARNING)
 
@@ -139,7 +139,7 @@ def connect(self, name):
         # TODO: request the full citation to be added to this metadata intead
         # of linking to the about page
         about = '/'.join([response[idxs['uiUrl']]['value'], '#', 'about'])
-        logger().info(_CITE_MSG.format(name, about))
+        logger.info(_CITE_MSG.format(name, about))
 
         self._connected = name
 
@@ -325,13 +325,13 @@ def query(self, **kwargs):
         }
         data = json.dumps(self._query_post_data(**kwargs))
         url = '/'.join([self._base_url, 'runs/bulk/ts'])
-        logger().debug('Querying timeseries data '
+        logger.debug('Querying timeseries data '
                        'from {} with filter {}'.format(url, data))
         r = requests.post(url, headers=headers, data=data)
         _check_response(r)
         # refactor returned json object to be castable to an IamDataFrame
         df = pd.read_json(r.content, orient='records')
-        logger().debug('Response size is {0} bytes, '
+        logger.debug('Response size is {0} bytes, '
                        '{1} records'.format(len(r.content), len(df)))
         columns = ['model', 'scenario', 'variable', 'unit',
                    'region', 'year', 'value', 'time', 'meta',

diff --git a/pyam/logger.py b/pyam/logger.py
diff --git a/pyam/logging.py b/pyam/logging.py
@@ -0,0 +1,10 @@
+from contextlib import contextmanager
+
+
+@contextmanager
+def adjust_log_level(logger, level='ERROR'):
+    """Context manager to change log level"""
+    old_level = logger.getEffectiveLevel()
+    logger.setLevel(level)
+    yield
+    logger.setLevel(old_level)
diff --git a/pyam/plotting.py b/pyam/plotting.py
@@ -1,4 +1,5 @@
 import itertools
+import logging
 import warnings
 
 import matplotlib.pyplot as plt
@@ -16,14 +17,15 @@
 except ImportError:
     from functools32 import lru_cache
 
-from pyam.logger import logger
 from pyam.run_control import run_control
 from pyam.utils import requires_package, IAMC_IDX, SORT_IDX, isstr
 # TODO: this is a hotfix for changes in pandas 0.25.0, per discussions on the
 # pandas-dev listserv, we should try to ask if matplotlib would make it a
 # standard feature in their library
 from pyam._style import _get_standard_colors
 
+logger = logging.getLogger(__name__)
+
 # line colors, markers, and styles that are cycled through when not
 # explicitly declared
 _DEFAULT_PROPS = None
@@ -805,7 +807,7 @@ def line_plot(df, x='year', y='value', ax=None, legend=None, title=True,
 
 def _add_legend(ax, handles, labels, legend):
     if legend is None and len(labels) >= MAX_LEGEND_LABELS:
-        logger().info('>={} labels, not applying legend'.format(
+        logger.info('>={} labels, not applying legend'.format(
             MAX_LEGEND_LABELS))
     else:
         legend = {} if legend in [True, None] else legend

diff --git a/pyam/timeseries.py b/pyam/timeseries.py
@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
-
+import logging
 import numpy as np
-from pyam.logger import logger
 from pyam.utils import isstr, to_int
 
+logger = logging.getLogger(__name__)
+
 # %%
 
 
@@ -51,11 +52,11 @@ def cumulative(x, first_year, last_year):
     # if the timeseries does not cover the range `[first_year, last_year]`,
     # return nan to avoid erroneous aggregation
     if min(x.index) > first_year:
-        logger().warning('the timeseries `{}` does not start by {}'.format(
+        logger.warning('the timeseries `{}` does not start by {}'.format(
             x.name or x, first_year))
         return np.nan
     if max(x.index) < last_year:
-        logger().warning('the timeseries `{}` does not extend until {}'
+        logger.warning('the timeseries `{}` does not extend until {}'
                          .format(x.name or x, last_year))
         return np.nan
 

diff --git a/pyam/utils.py b/pyam/utils.py
@@ -1,5 +1,6 @@
 import os
 import itertools
+import logging
 import string
 import six
 import re
@@ -17,7 +18,7 @@
 except ImportError:
     pass
 
-from pyam.logger import logger
+logger = logging.getLogger(__name__)
 
 # common indicies
 META_IDX = ['model', 'scenario']
@@ -117,7 +118,7 @@ def read_file(fname, *args, **kwargs):
     if not isstr(fname):
         raise ValueError('reading multiple files not supported, '
                          'please use `pyam.IamDataFrame.append()`')
-    logger().info('Reading `{}`'.format(fname))
+    logger.info('Reading `{}`'.format(fname))
     format_kwargs = {}
     # extract kwargs that are intended for `format_data`
     for c in [i for i in IAMC_IDX + ['year', 'time', 'value'] if i in kwargs]:
@@ -185,7 +186,7 @@ def convert_r_columns(c):
     df.rename(columns={c: str(c).lower() for c in str_cols}, inplace=True)
 
     if 'notes' in df.columns:  # this came from the database
-        logger().info('Ignoring notes column in dataframe')
+        logger.info('Ignoring notes column in dataframe')
         df.drop(columns='notes', inplace=True)
         col = df.columns[0]  # first column has database copyright notice
         df = df[~df[col].str.contains('database', case=False)]

diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py
@@ -90,7 +90,7 @@ def test_check_internal_consistency_no_world_for_variable(
     test_df = check_aggregate_df.filter(
         variable='Emissions|CH4', region='World', keep=False
     )
-    caplog.set_level(logging.INFO)
+    caplog.set_level(logging.INFO, logger="pyam.core")
     test_df.check_internal_consistency()
     warn_idx = caplog.messages.index("variable `Emissions|CH4` does not exist "
                                      "in region `World`")
@@ -306,6 +306,7 @@ def test_aggregate_region_components_handling(check_aggregate_regional_df,
 
 def test_check_aggregate_region_no_world(check_aggregate_regional_df, caplog):
     test_df = check_aggregate_regional_df.filter(region='World', keep=False)
+    caplog.set_level(logging.INFO, logger="pyam.core")
     test_df.check_aggregate_region('Emissions|N2O', region='World')
     warn_idx = caplog.messages.index("variable `Emissions|N2O` does not exist "
                                      "in region `World`")