From 649723aa16a8ea839d66dbe58f66264661474f02 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Fri, 22 Oct 2021 02:17:00 -0500 Subject: [PATCH] feat: Allow zero rate Poisson (#1657) * Effectively reverts most of PR #1001 and PR #280, reapplies most of PR #277 * Use scipy.special.xlogy in Poisson computation for numpy backend and use jax.scipy.special.xlogy for jax backend * Set minimum required PyTorch to v1.10 for API stability - c.f. https://github.com/pytorch/pytorch/pull/61511 in torch v1.10.0 * Set minimum required TensorFlow to v2.3.1 and TensorFlow Probability to v0.11.0 - tfp v0.11.0 supports zero rate Poisson and requires tensorflow>=2.3.0 * Add note to docs that limit Poisson(n = 0 | lambda -> 0) = 1 is being used * Update tests to use limit Poisson(n = 0 | lambda -> 0) = 1 result * Run doctest on only the latest Python release Co-authored-by: Ruggero Turra --- .github/workflows/ci.yml | 1 + .../workflows/lower-bound-requirements.yml | 4 +- lower-bound-requirements.txt | 6 +-- setup.py | 6 +-- src/pyhf/tensor/jax_backend.py | 20 ++++++++-- src/pyhf/tensor/numpy_backend.py | 20 ++++++++-- src/pyhf/tensor/pytorch_backend.py | 14 +++++++ src/pyhf/tensor/tensorflow_backend.py | 37 ++++++++----------- tests/test_tensor.py | 18 +++------ 9 files changed, 80 insertions(+), 46 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 08a420a8f5..3daa327055 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,6 +68,7 @@ jobs: flags: contrib - name: Test docstring examples with doctest + if: matrix.python-version == 3.9 run: pytest -r sx src/ README.rst - name: Report doctest coverage with Codecov diff --git a/.github/workflows/lower-bound-requirements.yml b/.github/workflows/lower-bound-requirements.yml index 3df57054f6..a24995be9a 100644 --- a/.github/workflows/lower-bound-requirements.yml +++ b/.github/workflows/lower-bound-requirements.yml @@ -30,7 +30,9 @@ jobs: python -m pip --no-cache-dir --quiet install --requirement lower-bound-requirements.txt python -m pip --no-cache-dir --quiet install .[test] python -m pip install --requirement lower-bound-requirements.txt - python -m pip list + + - name: List installed Python packages + run: python -m pip list - name: Test with pytest run: | diff --git a/lower-bound-requirements.txt b/lower-bound-requirements.txt index 8bdb7c2878..61fd6f7971 100644 --- a/lower-bound-requirements.txt +++ b/lower-bound-requirements.txt @@ -10,10 +10,10 @@ uproot==4.1.1 # minuit iminuit==2.4.0 # tensorflow -tensorflow==2.2.1 # c.f. PR #1001 -tensorflow-probability==0.10.1 +tensorflow==2.3.1 # tensorflow-probability v0.11.0 requires tensorflow>=2.3 +tensorflow-probability==0.11.0 # c.f. PR #1657 # torch -torch==1.8.0 +torch==1.10.0 # jax # Use Google Cloud Storage buckets for long term wheel support # c.f. https://github.com/google/jax/discussions/7608#discussioncomment-1269342 diff --git a/setup.py b/setup.py index f4ba5da504..7e64b5ac15 100644 --- a/setup.py +++ b/setup.py @@ -3,10 +3,10 @@ extras_require = { 'shellcomplete': ['click_completion'], 'tensorflow': [ - 'tensorflow~=2.2,>=2.2.1,!=2.3.0', # c.f. https://github.com/tensorflow/tensorflow/pull/40789 - 'tensorflow-probability~=0.10,>=0.10.1', + 'tensorflow~=2.3,!=2.3.0', # c.f. https://github.com/tensorflow/tensorflow/pull/40789 + 'tensorflow-probability~=0.11', ], - 'torch': ['torch~=1.8'], + 'torch': ['torch~=1.10'], 'jax': ['jax~=0.2.8', 'jaxlib~=0.1.58,!=0.1.68'], # c.f. Issue 1501 'xmlio': ['uproot>=4.1.1'], 'minuit': ['iminuit>=2.4'], diff --git a/src/pyhf/tensor/jax_backend.py b/src/pyhf/tensor/jax_backend.py index f5867ded10..99b25e3967 100644 --- a/src/pyhf/tensor/jax_backend.py +++ b/src/pyhf/tensor/jax_backend.py @@ -3,7 +3,7 @@ config.update('jax_enable_x64', True) import jax.numpy as jnp -from jax.scipy.special import gammaln +from jax.scipy.special import gammaln, xlogy from jax.scipy import special from jax.scipy.stats import norm import numpy as np @@ -368,7 +368,7 @@ def einsum(self, subscripts, *operands): def poisson_logpdf(self, n, lam): n = jnp.asarray(n) lam = jnp.asarray(lam) - return n * jnp.log(lam) - lam - gammaln(n + 1.0) + return xlogy(n, lam) - lam - gammaln(n + 1.0) def poisson(self, n, lam): r""" @@ -376,6 +376,20 @@ def poisson(self, n, lam): to the probability mass function of the Poisson distribution evaluated at :code:`n` given the parameter :code:`lam`. + .. note:: + + Though the p.m.f of the Poisson distribution is not defined for + :math:`\lambda = 0`, the limit as :math:`\lambda \to 0` is still + defined, which gives a degenerate p.m.f. of + + .. math:: + + \lim_{\lambda \to 0} \,\mathrm{Pois}(n | \lambda) = + \left\{\begin{array}{ll} + 1, & n = 0,\\ + 0, & n > 0 + \end{array}\right. + Example: >>> import pyhf @@ -398,7 +412,7 @@ def poisson(self, n, lam): """ n = jnp.asarray(n) lam = jnp.asarray(lam) - return jnp.exp(n * jnp.log(lam) - lam - gammaln(n + 1.0)) + return jnp.exp(xlogy(n, lam) - lam - gammaln(n + 1.0)) def normal_logpdf(self, x, mu, sigma): # this is much faster than diff --git a/src/pyhf/tensor/numpy_backend.py b/src/pyhf/tensor/numpy_backend.py index f3601b17a5..f23a4ce9b8 100644 --- a/src/pyhf/tensor/numpy_backend.py +++ b/src/pyhf/tensor/numpy_backend.py @@ -1,7 +1,7 @@ """NumPy Tensor Library Module.""" import numpy as np import logging -from scipy.special import gammaln +from scipy.special import gammaln, xlogy from scipy import special from scipy.stats import norm, poisson @@ -349,7 +349,7 @@ def einsum(self, subscripts, *operands): return np.einsum(subscripts, *operands) def poisson_logpdf(self, n, lam): - return n * np.log(lam) - lam - gammaln(n + 1.0) + return xlogy(n, lam) - lam - gammaln(n + 1.0) def poisson(self, n, lam): r""" @@ -357,6 +357,20 @@ def poisson(self, n, lam): to the probability mass function of the Poisson distribution evaluated at :code:`n` given the parameter :code:`lam`. + .. note:: + + Though the p.m.f of the Poisson distribution is not defined for + :math:`\lambda = 0`, the limit as :math:`\lambda \to 0` is still + defined, which gives a degenerate p.m.f. of + + .. math:: + + \lim_{\lambda \to 0} \,\mathrm{Pois}(n | \lambda) = + \left\{\begin{array}{ll} + 1, & n = 0,\\ + 0, & n > 0 + \end{array}\right. + Example: >>> import pyhf @@ -379,7 +393,7 @@ def poisson(self, n, lam): """ n = np.asarray(n) lam = np.asarray(lam) - return np.exp(n * np.log(lam) - lam - gammaln(n + 1.0)) + return np.exp(xlogy(n, lam) - lam - gammaln(n + 1.0)) def normal_logpdf(self, x, mu, sigma): # this is much faster than diff --git a/src/pyhf/tensor/pytorch_backend.py b/src/pyhf/tensor/pytorch_backend.py index 0b22ea8275..398a30eabf 100644 --- a/src/pyhf/tensor/pytorch_backend.py +++ b/src/pyhf/tensor/pytorch_backend.py @@ -367,6 +367,20 @@ def poisson(self, n, lam): to the probability mass function of the Poisson distribution evaluated at :code:`n` given the parameter :code:`lam`. + .. note:: + + Though the p.m.f of the Poisson distribution is not defined for + :math:`\lambda = 0`, the limit as :math:`\lambda \to 0` is still + defined, which gives a degenerate p.m.f. of + + .. math:: + + \lim_{\lambda \to 0} \,\mathrm{Pois}(n | \lambda) = + \left\{\begin{array}{ll} + 1, & n = 0,\\ + 0, & n > 0 + \end{array}\right. + Example: >>> import pyhf diff --git a/src/pyhf/tensor/tensorflow_backend.py b/src/pyhf/tensor/tensorflow_backend.py index 1d58d7d20b..3f34a1fc88 100644 --- a/src/pyhf/tensor/tensorflow_backend.py +++ b/src/pyhf/tensor/tensorflow_backend.py @@ -2,7 +2,6 @@ import logging import tensorflow as tf import tensorflow_probability as tfp -from numpy import nan log = logging.getLogger(__name__) @@ -427,15 +426,7 @@ def poisson_logpdf(self, n, lam): TensorFlow Tensor: Value of the continuous approximation to log(Poisson(n|lam)) """ lam = self.astensor(lam) - # Guard against Poisson(n=0 | lam=0) - # c.f. https://github.com/scikit-hep/pyhf/issues/293 - valid_obs_given_rate = tf.logical_or( - tf.math.not_equal(lam, n), tf.math.not_equal(n, 0) - ) - - return tf.where( - valid_obs_given_rate, tfp.distributions.Poisson(lam).log_prob(n), nan - ) + return tfp.distributions.Poisson(lam).log_prob(n) def poisson(self, n, lam): r""" @@ -443,6 +434,20 @@ def poisson(self, n, lam): to the probability mass function of the Poisson distribution evaluated at :code:`n` given the parameter :code:`lam`. + .. note:: + + Though the p.m.f of the Poisson distribution is not defined for + :math:`\lambda = 0`, the limit as :math:`\lambda \to 0` is still + defined, which gives a degenerate p.m.f. of + + .. math:: + + \lim_{\lambda \to 0} \,\mathrm{Pois}(n | \lambda) = + \left\{\begin{array}{ll} + 1, & n = 0,\\ + 0, & n > 0 + \end{array}\right. + Example: >>> import pyhf >>> pyhf.set_backend("tensorflow") @@ -465,17 +470,7 @@ def poisson(self, n, lam): TensorFlow Tensor: Value of the continuous approximation to Poisson(n|lam) """ lam = self.astensor(lam) - # Guard against Poisson(n=0 | lam=0) - # c.f. https://github.com/scikit-hep/pyhf/issues/293 - valid_obs_given_rate = tf.logical_or( - tf.math.not_equal(lam, n), tf.math.not_equal(n, 0) - ) - - return tf.where( - valid_obs_given_rate, - tf.exp(tfp.distributions.Poisson(lam).log_prob(n)), - nan, - ) + return tf.exp(tfp.distributions.Poisson(lam).log_prob(n)) def normal_logpdf(self, x, mu, sigma): r""" diff --git a/tests/test_tensor.py b/tests/test_tensor.py index 81a01f14c0..c17198a3f7 100644 --- a/tests/test_tensor.py +++ b/tests/test_tensor.py @@ -287,17 +287,14 @@ def test_pdf_calculations(backend): ], nan_ok=True, ) - # poisson(lambda=0) is not defined, should return NaN + # Allow poisson(lambda=0) under limit Poisson(n = 0 | lambda -> 0) = 1 assert tb.tolist( tb.poisson(tb.astensor([0, 0, 1, 1]), tb.astensor([0, 1, 0, 1])) - ) == pytest.approx( - [np.nan, 0.3678794503211975, 0.0, 0.3678794503211975], nan_ok=True - ) + ) == pytest.approx([1.0, 0.3678794503211975, 0.0, 0.3678794503211975]) assert tb.tolist( tb.poisson_logpdf(tb.astensor([0, 0, 1, 1]), tb.astensor([0, 1, 0, 1])) ) == pytest.approx( - np.log([np.nan, 0.3678794503211975, 0.0, 0.3678794503211975]).tolist(), - nan_ok=True, + np.log([1.0, 0.3678794503211975, 0.0, 0.3678794503211975]).tolist() ) # Ensure continuous approximation is valid @@ -333,17 +330,14 @@ def test_pdf_calculations_pytorch(backend): ], ) - # poisson(lambda=0) is not defined, should return NaN + # Allow poisson(lambda=0) under limit Poisson(n = 0 | lambda -> 0) = 1 assert tb.tolist( tb.poisson(tb.astensor([0, 0, 1, 1]), tb.astensor([0, 1, 0, 1])) - ) == pytest.approx( - [np.nan, 0.3678794503211975, 0.0, 0.3678794503211975], nan_ok=True - ) + ) == pytest.approx([1.0, 0.3678794503211975, 0.0, 0.3678794503211975]) assert tb.tolist( tb.poisson_logpdf(tb.astensor([0, 0, 1, 1]), tb.astensor([0, 1, 0, 1])) ) == pytest.approx( - np.log([np.nan, 0.3678794503211975, 0.0, 0.3678794503211975]).tolist(), - nan_ok=True, + np.log([1.0, 0.3678794503211975, 0.0, 0.3678794503211975]).tolist() ) # Ensure continuous approximation is valid