diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml new file mode 100644 index 0000000..eecf0d1 --- /dev/null +++ b/.github/workflows/draft-pdf.yml @@ -0,0 +1,23 @@ +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: paper/paper.md + - name: Upload + uses: actions/upload-artifact@v1 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: paper/paper.pdf diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index d4b1b53..7ce47a6 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.11"] steps: - uses: actions/checkout@v3 @@ -19,11 +19,12 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pylint pytest pytest-cov hypothesis - pip install .[pandas] - - name: Lint with pylint + pip install pre-commit pytest pytest-cov hypothesis + pre-commit install + pip install .[CI] + - name: Run pre-commit checks run: | - pylint --fail-under=8 auto_uncertainties + pre-commit run --all-files - name: Test with pytest run: | - pytest --cov=auto_uncertainties --cov-report=xml --cov-report=html -k "not pandas" + pytest --cov=auto_uncertainties --cov-report=xml --cov-report=html --ignore=tests/pandas diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8202f34..3314e84 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,34 +1,37 @@ repos: -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.275 - hooks: - - id: ruff - args: [ --fix, --exit-non-zero-on-fix ] - -- repo: https://github.com/psf/black.git - rev: 23.1.0 - hooks: - - id: black - language_version: python3.9 - -- repo: https://github.com/pycqa/isort - rev: 5.12.0 +- repo: local hooks: - - id: isort - name: isort (python) + + - id: generate-init + name: Generates __init__.py files + language: python + entry: python hooks/generate_init.py + always_run: true + require_serial: true + additional_dependencies: ["mkinit", "ruff"] + - id: fix-line-endings + name: Convert CRLF/CR endings to LF + language: python + require_serial: true + entry: python hooks/fix_line_endings.py + types: ["text"] + - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.6.0 hooks: - - id: end-of-file-fixer - - id: fix-encoding-pragma - - id: trailing-whitespace - - id: check-added-large-files - - id: check-ast - - id: check-case-conflict - id: check-executables-have-shebangs - - id: check-merge-conflict - id: check-symlinks - - id: debug-statements - - id: mixed-line-ending + +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.4.6 + hooks: + #Run the formatter. + - id: ruff-format + types_or: [ python, pyi, jupyter ] + #Run the linter. + - id: ruff + types_or: [ python, pyi, jupyter ] + args: [ --fix, --exit-zero ] \ No newline at end of file diff --git a/README.rst b/README.md similarity index 55% rename from README.rst rename to README.md index e0b10cc..cb58f79 100644 --- a/README.rst +++ b/README.md @@ -1,14 +1,3 @@ -.. image:: https://img.shields.io/pypi/v/auto-uncertainties.svg - :target: https://pypi.org/project/auto-uncertainties/ - :alt: Latest Version - -.. image:: https://img.shields.io/pypi/l/auto-uncertainties.svg - :target: https://pypi.org/project/auto-uncertainties/ - :alt: License - -.. image:: https://github.com/varchasgopalaswamy/AutoUncertainties/actions/workflows/python-app.yml/badge.svg - :target: https://github.com/varchasgopalaswamy/AutoUncertainties/actions?query=workflow - :alt: Tests AutoUncertainties ======================== @@ -29,18 +18,36 @@ Usage Creating a scalar Uncertainty variable is relatively simple: -.. code-block:: python - +```python >>> from auto_uncertainties import Uncertainty >>> value = 1.0 >>> error = 0.1 >>> u = Uncertainty(value,error) >>> u 1.0 +/- 0.1 +``` as is creating a numpy array of Uncertainties: -.. code-block:: python +```python + + >>> from auto_uncertainties import Uncertainty + >>> import numpy as np + >>> value = np.linspace(start=0,stop=10,num=5) + >>> error = np.ones_like(value)*0.1 + >>> u = Uncertainty(value,error) +``` + +(though, they are actually different classes!) + +```python + + >>> from auto_uncertainties import Uncertainty + >>> value = 1.0 + >>> error = 0.1 + >>> u = Uncertainty(value,error) + >>> type(u) + auto_uncertainties.uncertainty.ScalarUncertainty >>> from auto_uncertainties import Uncertainty >>> import numpy as np @@ -48,21 +55,22 @@ as is creating a numpy array of Uncertainties: >>> error = np.ones_like(value)*0.1 >>> u = Uncertainty(value,error) >>> u - [ 0. 2.5 5. 7.5 10. ] +/- [0.1 0.1 0.1 0.1 0.1] + auto_uncertainties.uncertainty.VectorUncertainty +``` Scalar uncertainties implement all mathematical and logical `dunder methods `_ explicitly. -.. code-block:: python +```python >>> from auto_uncertainties import Uncertainty >>> u = Uncertainty(10.0, 3.0) >>> v = Uncertainty(20.0, 4.0) >>> u + v 30.0 +/- 5.0 - +``` Array uncertainties implement a large subset of the numpy ufuncs and methods using :code:`jax.grad` or :code:`jax.jacfwd`, depending on the output shape. -.. code-block:: python +```python >>> from auto_uncertainties import Uncertainty >>> import numpy as np @@ -70,19 +78,23 @@ Array uncertainties implement a large subset of the numpy ufuncs and methods usi >>> error = np.ones_like(value)*0.1 >>> u = Uncertainty(value,error) >>> np.exp(u) - [1.00000000e+00 1.21824940e+01 1.48413159e+02 1.80804241e+03 - 2.20264658e+04] +/- [1.00000000e-01 1.21824940e+00 1.48413159e+01 1.80804241e+02 - 2.20264658e+03] + Magnitude + + 1, 12.182, 148.413, 1808.04, 22026.5 + + Error + + 0.1, 1.2, 15, 180, 2200 >>> np.sum(u) - 25.0 +/- 0.223606797749979 + 25.0 +/- 0.22 >>> u.sum() - 25.0 +/- 0.223606797749979 + 25.0 +/- 0.22 >>> np.sqrt(np.sum(error**2)) 0.223606797749979 - +``` The mean value and the standard deviation (the measurements are assumed to be normally distributed) can be accessed via -.. code-block:: python +```python >>> from auto_uncertainties import Uncertainty >>> u = Uncertainty(10.0, 3.0) @@ -90,6 +102,37 @@ The mean value and the standard deviation (the measurements are assumed to be no 10.0 >>> u.error 3.0 +``` + +Displayed values are automatically rounded according to the Particle Data Group standard. This can be turned off using `set_display_rounding` +```python + + >>> from auto_uncertainties import set_display_rounding + >>> set_display_rounding(False) + >>> from auto_uncertainties import Uncertainty + >>> import numpy as np + >>> value = np.linspace(start=0,stop=10,num=5) + >>> error = np.ones_like(value)*0.1 + >>> u = Uncertainty(value,error) + >>> np.sum(u) + 25.0 +/- 0.223606797749979 +``` +If `np.array` is called on an `Uncertainty` object, it will automatically get cast down to a numpy array (and lose uncertainty information!), and emit a warning. To make this an error, use `set_downcast_error` +```python + + >>> from auto_uncertainties import set_downcast_error + >>> set_downcast_error(False) + >>> from auto_uncertainties import Uncertainty + >>> import numpy as np + >>> value = np.linspace(start=0,stop=10,num=5) + >>> error = np.ones_like(value)*0.1 + >>> u = Uncertainty(value,error) + >>> np.array(u) + + Exception: The uncertainty is stripped when downcasting to ndarray. +``` + + Prerequisites =========== @@ -97,7 +140,7 @@ Prerequisites For array support: * jax -* jaxlib (must be built from source if you are not on Linux machine with AVX instruction sets.) +* jaxlib * numpy diff --git a/auto_uncertainties/__init__.py b/auto_uncertainties/__init__.py index 4651270..b9d2af6 100644 --- a/auto_uncertainties/__init__.py +++ b/auto_uncertainties/__init__.py @@ -1,66 +1,35 @@ -# -*- coding: utf-8 -*- from __future__ import annotations -import numpy as np - - -class NegativeStdDevError(Exception): - """An exception for when the standard deviation is negative""" - - pass - - -class NumpyDowncastWarning(RuntimeWarning): - """An exception for when an uncertainties array is downcast to a numpy array""" - - pass - - -from .uncertainty import Uncertainty # noqa: E402 - -try: - from .pandas_compat import UncertaintyArray -except ImportError: - UncertaintyArray = None - - -def nominal_values(x): - # Is an Uncertainty - if hasattr(x, "_nom"): - return x.value - else: - if np.ndim(x) > 0: - try: - x2 = Uncertainty.from_sequence(x) - except Exception: - return x - else: - return x2.value - else: - try: - x2 = Uncertainty(x) - except Exception: - return x - else: - return x2.value - - -def std_devs(x): - # Is an Uncertainty - if hasattr(x, "_err"): - return x.error - else: - if np.ndim(x) > 0: - try: - x2 = Uncertainty.from_sequence(x) - except Exception: - return np.zeros_like(x) - else: - return x2.error - else: - try: - x2 = Uncertainty(x) - except Exception: - return 0 - else: - return x2.error +__private__ = ["util"] +__protected__ = ["numpy"] +import lazy_loader + + +__getattr__, __dir__, __all__ = lazy_loader.attach_stub(__name__, __file__) + +__all__ = [ + "DowncastError", + "DowncastWarning", + "NegativeStdDevError", + "ScalarDisplay", + "ScalarUncertainty", + "Uncertainty", + "UncertaintyArray", + "UncertaintyDtype", + "VectorDisplay", + "VectorUncertainty", + "display_format", + "exceptions", + "nominal_values", + "numpy", + "pandas", + "set_compare_error", + "set_display_rounding", + "set_downcast_error", + "std_devs", + "unc_array", + "unc_dtype", + "uncertainty", + "uncertainty_containers", + "util", +] diff --git a/auto_uncertainties/__init__.pyi b/auto_uncertainties/__init__.pyi new file mode 100644 index 0000000..0d97cfd --- /dev/null +++ b/auto_uncertainties/__init__.pyi @@ -0,0 +1,60 @@ +from . import display_format +from . import exceptions +from . import numpy +from . import pandas +from . import uncertainty +from . import util + +from .display_format import ( + ScalarDisplay, + VectorDisplay, + set_display_rounding, +) +from .exceptions import ( + DowncastError, + DowncastWarning, + NegativeStdDevError, +) +from .pandas import ( + UncertaintyArray, + UncertaintyDtype, + unc_array, + unc_dtype, +) +from .uncertainty import ( + ScalarUncertainty, + Uncertainty, + VectorUncertainty, + nominal_values, + set_compare_error, + set_downcast_error, + std_devs, + uncertainty_containers, +) + +__all__ = [ + "DowncastError", + "DowncastWarning", + "NegativeStdDevError", + "ScalarDisplay", + "ScalarUncertainty", + "Uncertainty", + "UncertaintyArray", + "UncertaintyDtype", + "VectorDisplay", + "VectorUncertainty", + "display_format", + "exceptions", + "nominal_values", + "numpy", + "pandas", + "set_compare_error", + "set_display_rounding", + "set_downcast_error", + "std_devs", + "unc_array", + "unc_dtype", + "uncertainty", + "uncertainty_containers", + "util", +] diff --git a/auto_uncertainties/display_format.py b/auto_uncertainties/display_format.py new file mode 100644 index 0000000..3757d14 --- /dev/null +++ b/auto_uncertainties/display_format.py @@ -0,0 +1,231 @@ +from __future__ import annotations + +import decimal +import math + +from numpy.typing import NDArray + +ROUND_ON_DISPLAY = False + +__all__ = ["set_display_rounding", "VectorDisplay", "ScalarDisplay"] + + +def set_display_rounding(val: bool): + """Set the rounding on display to PDG recommendations.""" + global ROUND_ON_DISPLAY + ROUND_ON_DISPLAY = val + + +class VectorDisplay: + default_format: str = "" + _nom: NDArray + _err: NDArray + + def _repr_html_(self): + val_ = self._nom + err_ = self._err + header = "" + footer = "
" + vformatted = [] + eformatted = [] + for v, e in zip(val_.ravel(), err_.ravel(), strict=False): + vformat, eformat = pdg_round(v, e, return_zero=True) + vformatted.append(vformat) + eformatted.append(eformat) + val = f"Magnitude
{', '.join(vformatted)}
" + err = f"Error
{', '.join(eformatted)}
" + + return header + val + err + footer + + def _repr_latex_(self): + val_ = self._nom + err_ = self._err + s = [] + for v, e in zip(val_.ravel(), err_.ravel(), strict=False): + vformat, eformat = pdg_round(v, e, return_zero=True) + s.append(f"{vformat} \\pm {eformat}") + s = ", ".join(s) + "~" + header = "$" + footer = "$" + return header + s + footer + + def __str__(self) -> str: + val_ = self._nom + err_ = self._err + + s = [] + for v, e in zip(val_.ravel(), err_.ravel(), strict=False): + vformat, eformat = pdg_round(v, e, return_zero=True) + s.append(f"{vformat} +/- {eformat}") + return "[" + ", ".join(s) + "]" + + def __format__(self, fmt): + val_ = self._nom + err_ = self._err + s = [] + for v, e in zip(val_.ravel(), err_.ravel(), strict=False): + vformat, eformat = pdg_round(v, e, format_spec=fmt, return_zero=True) + s.append(f"{vformat} +/- {eformat}") + + return "[" + ", ".join(s) + "]" + + def __repr__(self) -> str: + return str(self) + + +class ScalarDisplay: + default_format: str = "" + _nom: float + _err: float + + def _repr_html_(self): + val_ = self._nom + err_ = self._err + vformat, eformat = pdg_round(val_, err_) + if eformat == "": + return f"{vformat}" + else: + return f"{vformat} {chr(0x00B1)} {eformat}" + + def _repr_latex_(self): + val_ = self._nom + err_ = self._err + vformat, eformat = pdg_round(val_, err_) + if eformat == "": + return f"{vformat}" + else: + return f"{vformat} \\pm {eformat}" + + def __str__(self) -> str: + val_ = self._nom + err_ = self._err + + vformat, eformat = pdg_round(val_, err_) + if eformat == "": + return f"{vformat}" + else: + return f"{vformat} +/- {eformat}" + + def __format__(self, fmt): + val_ = self._nom + err_ = self._err + + vformat, eformat = pdg_round(val_, err_) + if eformat == "": + return f"{vformat}" + else: + return f"{vformat} +/- {eformat}" + + def __repr__(self) -> str: + return str(self) + + +# From https://github.com/lmfit/uncertainties/blob/master/uncertainties/core.py +def first_digit(value): + """ + Return the first digit position of the given value, as an integer. + + 0 is the digit just before the decimal point. Digits to the right + of the decimal point have a negative position. + + Return 0 for a null value. + """ + try: + return int(math.floor(math.log10(abs(value)))) + except ValueError: # Case of value == 0 + return 0 + + +# From https://github.com/lmfit/uncertainties/blob/master/uncertainties/core.py +def PDG_precision(std_dev): + """ + Return the number of significant digits to be used for the given + standard deviation, according to the rounding rules of the + Particle Data Group (2010) + (http://pdg.lbl.gov/2010/reviews/rpp2010-rev-rpp-intro.pdf). + + Also returns the effective standard deviation to be used for + display. + """ + + exponent = first_digit(std_dev) + + # The first three digits are what matters: we get them as an + # integer number in [100; 999). + # + # In order to prevent underflow or overflow when calculating + # 10**exponent, the exponent is slightly modified first and a + # factor to be applied after "removing" the new exponent is + # defined. + # + # Furthermore, 10**(-exponent) is not used because the exponent + # range for very small and very big floats is generally different. + if exponent >= 0: + # The -2 here means "take two additional digits": + (exponent, factor) = (exponent - 2, 1) + else: + (exponent, factor) = (exponent + 1, 1000) + digits = int(std_dev / 10.0**exponent * factor) # int rounds towards zero + + # Rules: + if digits <= 354: + return (2, std_dev) + elif digits <= 949: + return (1, std_dev) + else: + # The parentheses matter, for very small or very large + # std_dev: + return (2, 10.0**exponent * (1000 / factor)) + + +def pdg_round( + value, uncertainty, format_spec="g", *, return_zero: bool = False +) -> tuple[str, str]: + """ + Format a value with uncertainty according to PDG rounding rules. + + Args: + value (float): The central value. + uncertainty (float): The uncertainty of the value. + + Returns: + str: The formatted value with uncertainty. + """ + if ROUND_ON_DISPLAY: + if uncertainty is not None and uncertainty > 0: + _, pdg_unc = PDG_precision(uncertainty) + # Determine the order of magnitude of the uncertainty + order_of_magnitude = 10 ** (int(math.floor(math.log10(pdg_unc))) - 1) + + # Round the uncertainty based on how many digits we want to keep + rounded_uncertainty = ( + round(pdg_unc / order_of_magnitude) * order_of_magnitude + ) + # Round the central value according to the rounded uncertainty + unc_impled_digits_to_keep = -int( + math.floor(math.log10(rounded_uncertainty)) + ) + if value != 0: + # Keep at least two digits for the central value, even if the uncertainty is much larger + digits = max( + unc_impled_digits_to_keep, + -int(math.floor(math.log10(abs(value)))) + 1, + ) + else: + digits = unc_impled_digits_to_keep + + # Use decimal to keep trailing zeros + rounded_value_dec = round(decimal.Decimal(value), digits) + rounded_unc_dec = round( + decimal.Decimal(rounded_uncertainty), + unc_impled_digits_to_keep + 1, + ) + return ( + f"{rounded_value_dec:{format_spec}}", + f"{rounded_unc_dec:{format_spec}}", + ) + + else: + return f"{value:{format_spec}}", "0" if return_zero else "" + else: + return f"{value:{format_spec}}", f"{uncertainty:{format_spec}}" diff --git a/auto_uncertainties/exceptions.py b/auto_uncertainties/exceptions.py new file mode 100644 index 0000000..4863ee5 --- /dev/null +++ b/auto_uncertainties/exceptions.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +__all__ = ["NegativeStdDevError", "DowncastError", "DowncastWarning"] + + +class NegativeStdDevError(Exception): + """An exception for when the standard deviation is negative""" + + +class DowncastError(RuntimeError): + """An exception for when an uncertainties array is downcast to a numpy array""" + + +class DowncastWarning(RuntimeWarning): + """An exception for when an uncertainties array is downcast to a numpy array""" diff --git a/auto_uncertainties/numpy/__init__.py b/auto_uncertainties/numpy/__init__.py new file mode 100644 index 0000000..13345fb --- /dev/null +++ b/auto_uncertainties/numpy/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +import lazy_loader + + +__getattr__, __dir__, __all__ = lazy_loader.attach_stub(__name__, __file__) + +__all__ = ["HANDLED_FUNCTIONS", "HANDLED_UFUNCS", "numpy_wrappers", "wrap_numpy"] diff --git a/auto_uncertainties/numpy/__init__.pyi b/auto_uncertainties/numpy/__init__.pyi new file mode 100644 index 0000000..6dff9c7 --- /dev/null +++ b/auto_uncertainties/numpy/__init__.pyi @@ -0,0 +1,9 @@ +from . import numpy_wrappers + +from .numpy_wrappers import ( + HANDLED_FUNCTIONS, + HANDLED_UFUNCS, + wrap_numpy, +) + +__all__ = ["HANDLED_FUNCTIONS", "HANDLED_UFUNCS", "numpy_wrappers", "wrap_numpy"] diff --git a/auto_uncertainties/wrap_numpy.py b/auto_uncertainties/numpy/numpy_wrappers.py similarity index 75% rename from auto_uncertainties/wrap_numpy.py rename to auto_uncertainties/numpy/numpy_wrappers.py index 552c5d1..0cb0942 100644 --- a/auto_uncertainties/wrap_numpy.py +++ b/auto_uncertainties/numpy/numpy_wrappers.py @@ -1,25 +1,18 @@ -# -*- coding: utf-8 -*- # Based heavily on the implementation of pint's numpy array function wrapping from __future__ import annotations -try: - import numpy as np -except ImportError: - np = None -try: - import jax - import jax.numpy as jnp +import jax +import jax.numpy as jnp +import numpy as np -except ImportError: - jax = None - jnp = None - -from .util import has_length, is_iterable, ndarray_to_scalar +from auto_uncertainties.util import has_length, is_iterable, ndarray_to_scalar HANDLED_UFUNCS = {} HANDLED_FUNCTIONS = {} +__all__ = ["wrap_numpy", "HANDLED_UFUNCS", "HANDLED_FUNCTIONS"] + def _is_uncertainty(obj): """Test for _nom and _err attrs. @@ -75,7 +68,7 @@ def convert_arg(arg, attr: str = None): if attr != "_nom": return None else: - return [convert_arg(item, attr) for item in arg] + return np.array([convert_arg(item, attr) for item in arg]) else: if attr != "_nom": return None @@ -101,9 +94,7 @@ def classify_and_split_args_and_kwargs(*args, **kwargs): """ uncert_argnums = tuple( - idx - for idx, arg in enumerate(args) - if convert_arg(arg, "_nom") is not None + idx for idx, arg in enumerate(args) if convert_arg(arg, "_nom") is not None ) uncert_arg_nom = tuple(convert_arg(arg, "_nom") for arg in args) uncert_arg_err = [] @@ -114,9 +105,7 @@ def classify_and_split_args_and_kwargs(*args, **kwargs): else: uncert_arg_err.append(jnp.zeros_like(uncert_arg_nom[aidx])) uncert_arg_err = tuple(uncert_arg_err) - uncert_kwarg_nom = { - key: convert_arg(arg, "_nom") for key, arg in kwargs.items() - } + uncert_kwarg_nom = {key: convert_arg(arg, "_nom") for key, arg in kwargs.items()} return uncert_argnums, uncert_arg_nom, uncert_arg_err, uncert_kwarg_nom @@ -132,7 +121,8 @@ def decorator(func): elif func_type == "ufunc": HANDLED_UFUNCS[numpy_func_string] = func else: - raise ValueError("Invalid func_type {}".format(func_type)) + msg = f"Invalid func_type {func_type}" + raise ValueError(msg) return func return decorator @@ -144,7 +134,7 @@ def get_func_from_package(func_str, namespace): func = getattr(namespace, func_str_split[0], None) # If the function is not available, do not attempt to implement it if func is None: - return + return None for func_str_piece in func_str_split[1:]: func = getattr(func, func_str_piece) @@ -154,23 +144,22 @@ def get_func_from_package(func_str, namespace): def elementwise_grad(g): def wrapped(*args, **kwargs): y, g_vjp = jax.vjp(lambda *a: g(*a, **kwargs), *args) - x_bar = g_vjp(np.ones_like(y)) - return x_bar + return g_vjp(np.ones_like(y)) return wrapped def get_mappable_dims(*args): # Check that all the args have the same dimension - assert all([a.ndim == args[0].ndim for a in args]) + assert all(a.ndim == args[0].ndim for a in args) # Check that the size of each dimension is either the same as the maximum, or 1 mappable = [None for a in args] max_dim_sizes = [] - for i, dim in enumerate(range(args[0].ndim)): + for _, dim in enumerate(range(args[0].ndim)): sz = [a.shape[dim] for a in args] max_sz = max(sz) max_dim_sizes.append(max_sz) - assert all([s == max_sz or s == 1 for s in sz]) + assert all(s == max_sz or s == 1 for s in sz) for i, a in enumerate(args): map_axes = [] for j, dim in enumerate(range(args[0].ndim)): @@ -190,6 +179,7 @@ def implement_func( grad_argnum_override=None, selection_operator=None, output_rank=0, + custom_jax_dispatch=None, ): """Add default-behavior NumPy function/ufunc to the handled list. @@ -213,7 +203,10 @@ def implement_func( if jnp is None: return - func = get_func_from_package(func_str, jnp) + if custom_jax_dispatch is not None: + func = custom_jax_dispatch + else: + func = get_func_from_package(func_str, jnp) # Skip the JAX overhead if you dont need gradient info func_np = get_func_from_package(func_str, np) @@ -235,31 +228,32 @@ def implementation(*args, **kwargs): value = func_np(*bcast_args_nom, **uncert_kwarg_nom) grads = elementwise_grad(func)(*bcast_args_nom, **uncert_kwarg_nom) error_dot_grad_sqr = [ - (e * g) ** 2 for e, g in zip(bcast_args_err, grads) + (e * g) ** 2 for e, g in zip(bcast_args_err, grads, strict=False) ] error = np.sum(error_dot_grad_sqr, axis=0) ** 0.5 return Uncertainty(value, error) # return uncert_instance.__class__(val, err) - elif implement_mode == "same_shape_bool": - return func_np(*uncert_arg_nom, **uncert_kwarg_nom) - elif implement_mode == "nograd": - return func_np(*uncert_arg_nom, **uncert_kwarg_nom) - elif implement_mode == "selection_operator": + elif ( + implement_mode == "same_shape_bool" + or implement_mode == "nograd" + or implement_mode == "selection_operator" + ): return func_np(*uncert_arg_nom, **uncert_kwarg_nom) elif implement_mode == "selection": sel_func_np = get_func_from_package(selection_operator, np) axis = uncert_kwarg_nom.pop("axis", None) if axis is None: idx = sel_func_np(*uncert_arg_nom, **uncert_kwarg_nom) - return np.ravel(uncert_arg_nom[0])[idx] + val = np.ravel(uncert_arg_nom[0])[idx] + err = np.ravel(uncert_arg_err[0])[idx] else: idxs = np.expand_dims( - sel_func_np( - *uncert_arg_nom, axis=axis, **uncert_kwarg_nom - ), + sel_func_np(*uncert_arg_nom, axis=axis, **uncert_kwarg_nom), axis=axis, ) - return np.take_along_axis(uncert_arg_nom[0], idxs, axis=axis) + val = np.take_along_axis(uncert_arg_nom[0], idxs, axis=axis) + err = np.take_along_axis(uncert_arg_err[0], idxs, axis=axis) + return Uncertainty(val, err) elif implement_mode in ["apply_to_both"]: val = func_np(*uncert_arg_nom, **uncert_kwarg_nom) err = np.abs(func_np(*uncert_arg_err, **uncert_kwarg_nom)) @@ -276,15 +270,19 @@ def implementation(*args, **kwargs): axis = tuple(axis) error_dot_grad_sqr = [ np.sum((e * g) ** 2, axis=axis) - for e, g in zip(bcast_args_err, grads) + for e, g in zip(bcast_args_err, grads, strict=False) ] else: error_dot_grad_sqr = [ - np.sum((e * g) ** 2) for e, g in zip(bcast_args_err, grads) + np.sum((e * g) ** 2) + for e, g in zip(bcast_args_err, grads, strict=False) ] err = np.sum(error_dot_grad_sqr, axis=0) ** 0.5 return Uncertainty(ndarray_to_scalar(val), ndarray_to_scalar(err)) + else: + msg = f"Invalid implement_mode {implement_mode}" + raise ValueError(msg) # Returns a bool array of the same shape (i.e. elementwise conditionals) @@ -365,9 +363,7 @@ def implementation(*args, **kwargs): "arctan2", "hypot", ] -bcast_same_shape_ufuncs = ( - binary_bcast_same_shape_ufuncs + unary_bcast_same_shape_ufuncs -) +bcast_same_shape_ufuncs = binary_bcast_same_shape_ufuncs + unary_bcast_same_shape_ufuncs for ufunc in bcast_same_shape_ufuncs: implement_func("ufunc", ufunc, implement_mode="same_shape") @@ -377,6 +373,31 @@ def implementation(*args, **kwargs): for ufunc in bcast_selection_operator_funcs: implement_func("function", ufunc, implement_mode="selection_operator") + +# This can only be done correctly via Monte-Carlo estimation +def _monte_carlo_reduction(a, axis=None, **kwargs): + from auto_uncertainties import Uncertainty + + N = 10000 + samples = np.random.normal(size=(*a._nom.shape, N)) # noqa: NPY002 + samples = a._nom[..., None] + samples * a._err[..., None] + + if axis is None: + axis = tuple(range(a._nom.ndim)) + + operation = getattr(np, kwargs.pop("op")) + result = operation(samples, axis=axis, **kwargs) + + mean_value = np.mean(result, axis=-1) + std_value = np.std(result, axis=-1) + + return Uncertainty(mean_value, std_value) + + +# apply_via_monte_carlo = ["max", "min", "amax", "amin", "median"] +# for ufunc in apply_via_monte_carlo: +# implements(ufunc, "function")(partial(_monte_carlo_reduction, op=ufunc)) + # Selects a sub-section of or reshapes the Uncertainty array by some criteria bcast_selection_funcs = { "max": "argmax", @@ -411,7 +432,6 @@ def implementation(*args, **kwargs): "negative", "positive", "fabs", - "round", "ceil", "floor", "rint", @@ -422,13 +442,78 @@ def implementation(*args, **kwargs): implement_func("function", ufunc, implement_mode="apply_to_both") # Applies a reduction -implement_func("function", "trapz", implement_mode="reduction_binary") +implement_func( + "function", + "trapz", + implement_mode="reduction_binary", + custom_jax_dispatch=jax.scipy.integrate.trapezoid, +) bcast_reduction_unary = ["std", "sum", "var", "mean", "ptp", "median"] for ufunc in bcast_reduction_unary: implement_func("function", ufunc, implement_mode="reduction_unary") +def _power(x1, x2, *args, **kwargs): + """x1 ** x2""" + from auto_uncertainties import Uncertainty + + if _is_uncertainty(x1): + sA = x1._err + A = x1._nom + else: + sA = 0 + A = x1 + if _is_uncertainty(x2): + sB = x2._err + B = x2._nom + else: + sB = 0 + B = x2 + + new_mag = np.power(A, B, *args, **kwargs) + + new_err = np.abs(new_mag) * np.sqrt( + (B / A * sA) ** 2 + (np.log(np.abs(A)) * sB) ** 2 + ) + + return Uncertainty(new_mag, new_err) + + +implements("power", "function")(_power) +implements("power", "ufunc")(_power) + + +def _searchsort(x1, x2, *args, **kwargs): + """x1 ** x2""" + + A = x1._nom if _is_uncertainty(x1) else x1 + B = x2._nom if _is_uncertainty(x2) else x2 + + return np.searchsorted(A, B, *args, **kwargs) + + +implements("searchsorted", "function")(_searchsort) + + +@implements("unique", "function") +def _unique( + ar, + return_index=False, + return_inverse=False, + return_counts=False, + axis=None, + *, + equal_nan=True, +): + ret = np.unique( + ar._nom, + return_index=True, + ) + idx = ret[1] + return ar.__class__(ar._nom[idx], ar._err[idx]) + + @implements("round", "function") def _round(a, *args, **kwargs): val = np.round(a._nom, *args, **kwargs).squeeze() @@ -436,6 +521,12 @@ def _round(a, *args, **kwargs): return a.__class__(val, err) +@implements("sort", "function") +def _sort(a, *args, **kwargs): + ind = np.argsort(a._nom, *args, **kwargs) + return np.take_along_axis(a, ind, *args, **kwargs).squeeze() + + @implements("take_along_axis", "function") def _take_along_axis(a, *args, **kwargs): val = np.take_along_axis(a._nom, *args, **kwargs).squeeze() @@ -471,12 +562,10 @@ def wrap_numpy(func_type, func, args, kwargs): elif func_type == "ufunc": handled = HANDLED_UFUNCS # ufuncs do not have func.__module__ - if isinstance(func, str): - name = func - else: - name = func.__name__ + name = func if isinstance(func, str) else func.__name__ else: - raise ValueError("Invalid func_type {}".format(func_type)) + msg = f"Invalid func_type {func_type}" + raise ValueError(msg) if name not in handled: return NotImplemented diff --git a/auto_uncertainties/pandas/__init__.py b/auto_uncertainties/pandas/__init__.py new file mode 100644 index 0000000..ef5bbab --- /dev/null +++ b/auto_uncertainties/pandas/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +import lazy_loader + + +__getattr__, __dir__, __all__ = lazy_loader.attach_stub(__name__, __file__) + +__all__ = ["UncertaintyArray", "UncertaintyDtype", "unc_array", "unc_dtype"] diff --git a/auto_uncertainties/pandas/__init__.pyi b/auto_uncertainties/pandas/__init__.pyi new file mode 100644 index 0000000..675b8a8 --- /dev/null +++ b/auto_uncertainties/pandas/__init__.pyi @@ -0,0 +1,11 @@ +from . import unc_array +from . import unc_dtype + +from .unc_array import ( + UncertaintyArray, +) +from .unc_dtype import ( + UncertaintyDtype, +) + +__all__ = ["UncertaintyArray", "UncertaintyDtype", "unc_array", "unc_dtype"] diff --git a/auto_uncertainties/pandas/unc_array.py b/auto_uncertainties/pandas/unc_array.py new file mode 100644 index 0000000..2f2fa88 --- /dev/null +++ b/auto_uncertainties/pandas/unc_array.py @@ -0,0 +1,542 @@ +"""An implementation of Decimal as a DType. + +https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.api.extensions.ExtensionDtype.html#pandas.api.extensions.ExtensionDtype +https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.api.extensions.ExtensionArray.html#pandas.api.extensions.ExtensionArray + +https://github.com/pandas-dev/pandas/tree/e246c3b05924ac1fe083565a765ce847fcad3d91/pandas/tests/extension/decimal + +""" + +from __future__ import annotations + +from copy import deepcopy +import sys +from typing import TYPE_CHECKING, Self, cast + +import numpy as np +import pandas as pd +from pandas.api.types import is_list_like +from pandas.compat import set_function_name +from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin +from pandas.core.dtypes.common import is_integer +from pandas.core.indexers import check_array_indexer + +from auto_uncertainties.uncertainty import ( + ScalarUncertainty, + Uncertainty, + VectorUncertainty, +) + +from .unc_dtype import UncertaintyDtype + +if TYPE_CHECKING: + pass + +__all__ = ["UncertaintyArray"] + + +class UncertaintyArray(ExtensionArray, ExtensionScalarOpsMixin): + """Abstract base class for custom 1-D array types.""" + + __array_priority__ = VectorUncertainty.__array_priority__ + __pandas_priority__ = 1999 + + #################################################### + #### Construction ################################## + #################################################### + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + """Construct a new ExtensionArray from a sequence of scalars.""" + return cls(scalars, dtype=dtype) + + @classmethod + def _from_sequence_of_strings( + cls, + strings, + *, + dtype: UncertaintyDtype | None = None, + copy: bool = False, + ): + vals = [] + for s in strings: + if not isinstance(s, str): + msg = "not all strings are of dtype str" + raise TypeError(msg) + vals.append(Uncertainty.from_string(s)) + + return cls(vals, dtype=dtype, copy=copy) + + @classmethod + def _from_factorized(cls, values, original): + """Reconstruct an ExtensionArray after factorization.""" + return cls(values) + + def __init__( + self, + values, + errors=None, + dtype=None, + copy=False, + ): + if errors is not None: + assert len(values) == len( + errors + ), "values and errors must have the same length" + else: + # Passed a UncertaintyArray + if isinstance(values, (UncertaintyArray)): + errors = values._data._err + values = values._data._nom + # Passed an Uncertainty + elif isinstance(values, (Uncertainty)): + errors = values._err + values = values._nom + # Passed some kind of list-like + elif is_list_like(values): + # If its got anything in it + # The only valid kinds of objects are + # 1. All a seq of UArrays + # Or any combination of + # 2. Uncertainties + # 3. tuples of value/error pairs + # 4. floats (in which case error will be zero) + + if len(values) > 0: + # If its a sequence of Uarrays + if all(isinstance(x, UncertaintyArray) for x in values): + errors = np.concatenate([x._data._err for x in values]) + values = np.concatenate([x._data._nom for x in values]) + else: + vals = [] + errs = [] + for x in values: + if isinstance(x, VectorUncertainty): + errs += x._err.tolist() + vals += x._nom.tolist() + elif isinstance(x, ScalarUncertainty): + errs.append(x._err) + vals.append(x._nom) + elif hasattr(x, "__len__") and len(x) == 2: + errs.append(x[1]) + vals.append(x[0]) + elif isinstance(x, float): + errs.append(0.0) + vals.append(x) + else: + msg = f"values must be only UncertaintyArray, Uncertainty, (float,float), float or sequences of these. Instead got {type(x)}" + raise ValueError(msg) + values = vals + errors = errs + # If its a sequence of Uncertainties + else: + errors = np.array([]) + values = np.array([]) + else: + msg = f"values must be only UncertaintyArray, Uncertainty or a list of them. Instead got {type(values)}" + raise ValueError(msg) + if copy: + values = deepcopy(values) + errors = deepcopy(errors) + values = np.atleast_1d(values) + errors = np.atleast_1d(errors) + + if dtype is None: + dtype = UncertaintyDtype(values.dtype) + + self._dtype = dtype + + self._data = VectorUncertainty(values, errors) + self._items = self.data = self._data + + assert self._data.ndim == 1, "Data must be 1-dimensional" + + ############################################# + ############# Attributes #################### + ############################################# + @property + def value(self): + return self._data._nom + + @property + def error(self): + return self._data._err + + @property + def nbytes(self): + """The byte size of the data.""" + return sys.getsizeof(self._data._nom[0]) * len(self) * 2 + + @property + def dtype(self): + """An instance of 'ExtensionDtype'.""" + return self._dtype + + @property + def array(self): + return self._data + + def copy(self): + """ + Return a copy of the array. + + Returns + ------- + ExtensionArray + + Examples + -------- + >>> arr = pd.array([1, 2, 3]) + >>> arr2 = arr.copy() + >>> arr[0] = 2 + >>> arr2 + + [1, 2, 3] + Length: 3, dtype: Int64 + """ + + return self.__class__(self._data, dtype=self.dtype, copy=True) + + ########################## + ###### NaN handling ###### + ########################## + def __contains__(self, item: Uncertainty | float) -> bool | np.bool_: + if isinstance(item, float) and pd.isna(item): + return cast(np.ndarray, self.isna()).any() + elif not isinstance(item, Uncertainty): + return False + else: + return super().__contains__(item) + + ########################## + ######## Numpy ########### + ########################## + + _HANDLED_TYPES = (np.ndarray, Uncertainty, float, int) + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # + if not all( + isinstance(t, (*self._HANDLED_TYPES, UncertaintyArray)) for t in inputs + ): + return NotImplemented + # Extract the underlying Uncertainty from all UArray objects + inputs = tuple( + x._data if isinstance(x, UncertaintyArray) else x for x in inputs + ) + # Perform the operation + result = getattr(ufunc, method)(*inputs, **kwargs) + # Deal with boolean ops, otherwise return a new UArray + if all(isinstance(x, bool | np.bool_) for x in result): + retval = result[0] if len(result) == 1 else np.asarray(result, dtype=bool) + elif ufunc.nout > 1: + retval = tuple(self.__class__(x) for x in result) + else: + retval = self.__class__(result) + + return retval + + def __pos__(self): + return self.__class__(+self._data) + + def __neg__(self): + return self.__class__(-self._data) + + def __abs__(self): + return self.__class__(abs(self._data)) + + def __invert__(self): + raise TypeError + + ############################## + #### List-like ############### + ############################## + + def __getitem__(self, item): + """Select a subset of self.""" + if is_integer(item): + return self._data[item] + + key = check_array_indexer(self, item) + return UncertaintyArray(self._data[key]) + + def __setitem__(self, key, value): + """Set the value of a subset of self.""" + if isinstance(value, UncertaintyArray): + v = value._data + elif is_list_like(value) and len(value) > 0: + if all(isinstance(x, UncertaintyArray) for x in value): + v = UncertaintyArray._from_sequence(value)._data + else: + v = Uncertainty.from_sequence(value) + if len(v) == 1: + v = v[0] + elif (is_list_like(value) and len(value) == 0) or (not value): + return + elif isinstance(value, Uncertainty): + v = value + elif not np.any(np.isfinite(value)): + v = self.dtype.na_value + else: + raise ValueError + + key = check_array_indexer(self, key) + self._data[key] = v + + def __len__(self) -> int: + """Length of this array.""" + if np.ndim(self._data) == 0: + return 0 + else: + return len(self._data) + + @classmethod + def _concat_same_type(cls, to_concat): + """Concatenate multiple arrays.""" + return cls( + np.concatenate([x._data for x in to_concat]), + ) + + def take( + self, + indexer, + allow_fill=False, + fill_value: (float | tuple[float, float] | ScalarUncertainty | None) = None, + ): + """Take elements from an array. + + Relies on the take method defined in pandas: + https://github.com/pandas-dev/pandas/blob/e246c3b05924ac1fe083565a765ce847fcad3d91/pandas/core/algorithms.py#L1483 + """ + from pandas.api.extensions import take + + if allow_fill: + if fill_value is None: + fill_value = self.dtype.na_value + fval = fill_value + ferr = 0 + + elif isinstance(fill_value, tuple): + fval = fill_value[0] + ferr = fill_value[1] + elif isinstance(fill_value, ScalarUncertainty): + fval = fill_value.value + ferr = fill_value.error + else: + fval = fill_value + ferr = 0 + else: + fval = ferr = None + v = take( + self._data._nom, + indexer, + fill_value=fval, + allow_fill=allow_fill, + ) + e = take( + self._data._err, + indexer, + fill_value=ferr, + allow_fill=allow_fill, + ) + return self._from_sequence(list(zip(v, e, strict=False))) + + def __eq__(self, other: pd.DataFrame | pd.Series | pd.Index | UncertaintyArray): + """ + Return for `self == other` (element-wise equality). + """ + # Implementer note: this should return a boolean numpy ndarray or + # a boolean ExtensionArray. + # When `other` is one of Series, Index, or DataFrame, this method should + # return NotImplemented (to ensure that those objects are responsible for + # first unpacking the arrays, and then dispatch the operation to the + # underlying arrays) + if isinstance(other, pd.DataFrame | pd.Series | pd.Index): + return NotImplemented + + return self._data == other._data + + def isna(self): + """A 1-D array indicating if each value is missing.""" + return np.isnan(self._data._nom) + + def _formatter(self, boxed=False): + def formatter(x): + return f"{x}" + + return formatter + + @property + def _na_value(self): + return self.dtype.na_value + + def dropna(self): + return self[~self.isna()] + + def unique(self): + return self.__class__(np.unique(self._data)) + + def searchsorted(self, value, side="left", sorter=None): + return np.searchsorted(self._data, value, side=side, sorter=sorter) + + def _values_for_argsort(self): + """ + Return values for sorting. + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + See Also + -------- + ExtensionArray.argsort : Return the indices that would sort this array. + """ + # Note: this is used in `ExtensionArray.argsort`. + return self._data._nom + + _supported_reductions = ( + "min", + "max", + "sum", + "mean", + "median", + "prod", + "std", + "var", + "sem", + ) + + def _reduce( + self, + name: str, + *, + skipna: bool = True, + keepdims: bool = False, + **kwargs, + ): + functions = { + "min": np.min, + "max": np.max, + "sum": np.sum, + "mean": np.mean, + "median": np.median, + "prod": np.prod, + "std": lambda x: np.std(x, ddof=1), + "var": lambda x: np.var(x, ddof=1), + "sem": lambda x: np.std(x, ddof=0), + } + if name not in functions: + msg = f"cannot perform {name} with type {self.dtype}" + raise TypeError(msg) + quantity = self.dropna().array if skipna else self.array + result = cast(Uncertainty, functions[name](quantity)) + + if keepdims: + return self.__class__(result) + else: + return result + + def _cmp_method(self, other, op): + # For use with OpsMixin + def convert_values(param): + if isinstance(param, ExtensionArray): + ovalues = param + else: + # Assume it's an object + ovalues = [param] * len(self) + return ovalues + + lvalues = self + rvalues = convert_values(other) + + # If the operator is not defined for the underlying objects, + # a TypeError should be raised + res = [op(a, b) for (a, b) in zip(lvalues, rvalues, strict=False)] + + return np.asarray(res, dtype=bool) + + def value_counts(self, dropna: bool = True): + from pandas.core.algorithms import ( + value_counts_internal as value_counts, + ) + + return value_counts(self._data._nom, dropna=dropna) + + @classmethod + def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None): + """ + A class method that returns a method that will correspond to an + operator for an ExtensionArray subclass, by dispatching to the + relevant operator defined on the individual elements of the + ExtensionArray. + + Parameters + ---------- + op : function + An operator that takes arguments op(a, b) + coerce_to_dtype : bool, default True + boolean indicating whether to attempt to convert + the result to the underlying ExtensionArray dtype. + If it's not possible to create a new ExtensionArray with the + values, an ndarray is returned instead. + + Returns + ------- + Callable[[Any, Any], Union[ndarray, ExtensionArray]] + A method that can be bound to a class. When used, the method + receives the two arguments, one of which is the instance of + this class, and should return an ExtensionArray or an ndarray. + + Returning an ndarray may be necessary when the result of the + `op` cannot be stored in the ExtensionArray. The dtype of the + ndarray uses NumPy's normal inference rules. + + Examples + -------- + Given an ExtensionArray subclass called MyExtensionArray, use + + __add__ = cls._create_method(operator.add) + + in the class definition of MyExtensionArray to create the operator + for addition, that will be based on the operator implementation + of the underlying elements of the ExtensionArray + """ + + def _binop(self: Self, other): + if isinstance(other, pd.DataFrame | pd.Series | pd.Index): + # rely on pandas to unbox and dispatch to us + return NotImplemented + + def convert_values(param): + if isinstance(param, UncertaintyArray): + return param + elif is_list_like(param): + ovalues = UncertaintyArray._from_sequence(param) + else: + ovalues = param + return ovalues + + lvalues = self._data + rvalues = convert_values(other) + + real_op = op + # If the operator is not defined for the underlying objects, + # a TypeError should be raised + if op.__name__ in ["divmod", "rdivmod", "__invert__"]: + raise TypeError + + if isinstance(rvalues, UncertaintyArray): + res = real_op(lvalues, rvalues._data) + else: + res = real_op(lvalues, rvalues) + + if all(isinstance(x, bool | np.bool_) for x in res): + return res + + return UncertaintyArray._from_sequence(res) + + op_name = f"__{op.__name__}__" + return set_function_name(_binop, op_name, cls) + + +UncertaintyArray._add_arithmetic_ops() +UncertaintyArray._add_comparison_ops() diff --git a/auto_uncertainties/pandas/unc_dtype.py b/auto_uncertainties/pandas/unc_dtype.py new file mode 100644 index 0000000..106d1c0 --- /dev/null +++ b/auto_uncertainties/pandas/unc_dtype.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING + +import numpy as np +from pandas.api.extensions import register_extension_dtype +from pandas.core.dtypes.base import ExtensionDtype + +from auto_uncertainties.uncertainty import ScalarUncertainty, Uncertainty + +if TYPE_CHECKING: + from pandas._typing import type_t + + from .unc_array import UncertaintyArray + +__all__ = ["UncertaintyDtype"] + + +@register_extension_dtype +class UncertaintyDtype(ExtensionDtype): + type = Uncertainty + name = "Uncertainty" + _match = re.compile(r"^[U|u]ncertainty(\[([A-Za-z0-9]+)\])?$") + _metadata = {} # noqa: RUF012 + + def __init__(self, dtype: np.dtype | str): + self.value_dtype = np.dtype(dtype).name + + @property + def na_value(self): + return ScalarUncertainty(np.nan, 0) + + def __repr__(self) -> str: + return f"Uncertainty[{self.value_dtype}]" + + @classmethod + def construct_array_type(cls) -> type_t[UncertaintyArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from .unc_array import UncertaintyArray + + return UncertaintyArray + + @classmethod + def construct_from_string(cls, string: str): + r""" + Construct this type from a string. + + This is useful mainly for data types that accept parameters. + For example, a period dtype accepts a frequency parameter that + can be set as ``period[H]`` (where H means hourly frequency). + + By default, in the abstract class, just the name of the type is + expected. But subclasses can overwrite this method to accept + parameters. + + Parameters + ---------- + string : str + The name of the type, for example ``category``. + + Returns + ------- + ExtensionDtype + Instance of the dtype. + + Raises + ------ + TypeError + If a class cannot be constructed from this 'string'. + + Examples + -------- + For extension dtypes with arguments the following may be an + adequate implementation. + + >>> import re + >>> @classmethod + ... def construct_from_string(cls, string): + ... pattern = re.compile(r"^my_type\[(?P.+)\]$") + ... match = pattern.match(string) + ... if match: + ... return cls(**match.groupdict()) + ... else: + ... raise TypeError( + ... f"Cannot construct a '{cls.__name__}' from '{string}'" + ... ) + """ + if not isinstance(string, str): + msg = f"'construct_from_string' expects a string, got {type(string)}" + raise TypeError(msg) + # error: Non-overlapping equality check (left operand type: "str", right + # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap] + assert isinstance(cls.name, str), (cls, type(cls.name)) + + match = cls._match.match(string) + if match is None: + msg = f"Cannot construct a '{UncertaintyDtype.__name__}' from '{string}'" + raise TypeError(msg) + if match.group(1) is None: + return cls("float64") + return cls(match.group(1)) + + @property + def _is_numeric(self) -> bool: + return True + + def __eq__(self, other) -> bool: + """ + Check whether 'other' is equal to self. + + By default, 'other' is considered equal if either + + * it's a string matching 'self.name'. + * it's an instance of this type and all of the attributes + in ``self._metadata`` are equal between `self` and `other`. + + Parameters + ---------- + other : Any + + Returns + ------- + bool + """ + if isinstance(other, str): + try: + other = self.construct_from_string(other) + except TypeError: + return False + if isinstance(other, type(self)): + return all( + getattr(self, attr) == getattr(other, attr) for attr in self._metadata + ) + return False + + def __hash__(self): + # make myself hashable + return hash(str(self)) diff --git a/auto_uncertainties/pandas_compat.py b/auto_uncertainties/pandas_compat.py deleted file mode 100644 index 2fd3731..0000000 --- a/auto_uncertainties/pandas_compat.py +++ /dev/null @@ -1,918 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import annotations - -import copy -import re -from typing import Any, Sequence, Union - -import numpy as np -import pandas as pd -from pandas import DataFrame, Series -from pandas.api.extensions import ( - ExtensionArray, - ExtensionDtype, - register_dataframe_accessor, - register_extension_dtype, -) -from pandas.api.types import ( - is_integer, - is_list_like, - is_object_dtype, - is_string_dtype, -) -from pandas.compat import set_function_name -from pandas.core.arrays.base import ExtensionOpsMixin -from pandas.core.indexers import check_array_indexer - -from . import nominal_values, std_devs, Uncertainty - - -class UncertaintyType(ExtensionDtype): - """ - An Uncertainty duck-typed class, suitable for holding an uncertainty (i.e. value and error pair) dtype. Closely follows the implementation in pint-pandas. - """ - - type = Uncertainty - _metadata = ("value_dtype",) - _match = re.compile(r"[U|u]ncertainty\[([a-zA-Z0-9]+)\]") - _cache = {} - value_dtype: Any - - @property - def _is_numeric(self): - # type: () -> bool - return True - - def __new__(cls, value_dtype): - """ - Parameters - ---------- - units : Pint units or string - """ - - if isinstance(value_dtype, UncertaintyType): - return value_dtype - - elif value_dtype is None: - # empty constructor for pickle compat - return object.__new__(cls) - - else: - u = object.__new__(cls) - u.value_dtype = np.dtype(value_dtype) - - return u - - @classmethod - def construct_from_string(cls, string): - """ - Strict construction from a string, raise a TypeError if not - possible - """ - if not isinstance(string, str): - raise TypeError( - f"'construct_from_string' expects a string, got {type(string)}" - ) - try: - dtype = np.dtype(str) - return cls(value_dtype=dtype) - except Exception: - ... - - if cls._match.match(string): - dtype = cls._match.match(string).group(1) - return cls(value_dtype=dtype) - - if isinstance(string, str) and ( - string.startswith("uncertainty[") or string.startswith("Pint[") - ): - # do not parse string like U as pint[U] - # avoid tuple to be regarded as unit - try: - return cls(units=string) - except ValueError: - pass - raise TypeError( - f"Cannot construct a 'UncertaintyType' from '{string}'" - ) - - @property - def name(self): - return f"Uncertainty[{self.value_dtype}]" - - @property - def na_value(self): - return Uncertainty(np.nan, np.nan) - - def __hash__(self): - # make myself hashable - return hash(str(self)) - - def __eq__(self, other): - try: - other = UncertaintyType(other) - except ValueError: - return False - return self.value_dtype == other.value_dtype - - @classmethod - def is_dtype(cls, dtype): - """ - Return a boolean if we if the passed type is an actual dtype that we - can match (via string or type) - """ - if isinstance(dtype, str): - return cls._match.match(dtype) - return super(UncertaintyType, cls).is_dtype(dtype) - - @classmethod - def construct_array_type(cls): - return UncertaintyArray - - def __repr__(self): - """ - Return a string representation for this object. - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - - return self.name - - -dtypemap = { - int: pd.Int64Dtype(), - np.int64: pd.Int64Dtype(), - np.int32: pd.Int32Dtype(), - np.int16: pd.Int16Dtype(), - np.int8: pd.Int8Dtype(), - # np.float128: pd.Float128Dtype(), - float: pd.Float64Dtype(), - np.float64: pd.Float64Dtype(), - np.float32: pd.Float32Dtype(), - np.complex128: pd.core.dtypes.dtypes.PandasDtype("complex128"), - np.complex64: pd.core.dtypes.dtypes.PandasDtype("complex64"), - # np.float16: pd.Float16Dtype(), -} -dtypeunmap = {v: k for k, v in dtypemap.items()} - - -class UncertaintyArray(ExtensionArray, ExtensionOpsMixin): - """Implements a class to describe an array of physical quantities: - the product of an array of numerical values and a unit of measurement. - - Parameters - ---------- - values : scalar or array-like - Array of mean/nominal values - errors: scalar or array-like - Array of standard deviations/uncertainties - dtype : UncertaintyType or str - Datatype of the underlying values - copy: bool - Whether to copy the values. - Returns - ------- - - """ - - _data = np.array([]) - context_name = None - - def __init__( - self, - values, - errors=None, - dtype=None, - copy=False, - ): - if dtype is None: - if isinstance(values, np.ndarray): - dtype = UncertaintyType(value_dtype=values.dtype) - if isinstance(dtype, str): - dtype = UncertaintyType.construct_from_string(dtype) - if not isinstance(dtype, UncertaintyType): - raise NotImplementedError - - self._dtype = dtype - - if isinstance(values, (UncertaintyArray)): - values = values._nom - errors = values._err - elif isinstance(values, (Uncertainty)): - values = values.value - errors = values.error - else: - assert ( - errors is not None - ), "errors must be specified for non UncertaintyArray values" - - if isinstance(values, np.ndarray): - dtype = values.dtype - if dtype in dtypemap: - dtype = dtypemap[dtype] - values = pd.array(values, copy=copy, dtype=dtype) - errors = pd.array(errors, copy=copy, dtype=dtype) - copy = False - elif not isinstance(values, pd.core.arrays.numeric.NumericArray): - values = pd.array(values, copy=copy) - errors = pd.array(errors, copy=copy) - copy = False - if copy: - values = values.copy() - errors = errors.copy() - - self._nom = values - self._err = errors - - def __getstate__(self): - # we need to discard the cached _Q, which is not pickleable - ret = dict(self.__dict__) - return ret - - def __setstate__(self, dct): - self.__dict__.update(dct) - - @property - def dtype(self): - # type: () -> ExtensionDtype - """An instance of 'ExtensionDtype'.""" - return self._dtype - - def __len__(self): - # type: () -> int - """Length of this array - - Returns - ------- - length : int - """ - return len(self._nom) - - def __getitem__(self, item): - # type (Any) -> Any - """Select a subset of self. - Parameters - ---------- - item : int, slice, or ndarray - * int: The position in 'self' to get. - * slice: A slice object, where 'start', 'stop', and 'step' are - integers or None - * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' - Returns - ------- - item : scalar Uncertainty or UncertaintyArray - """ - - if is_integer(item): - return Uncertainty(self._nom[item], self._err[item]) - - item = check_array_indexer(self, item) - - return self.__class__( - self._nom[item], - self._err[item], - ) - - def __setitem__( - self, - key, - value: Union[Uncertainty, UncertaintyArray, Sequence[Uncertainty]], - ): - # need to not use `not value` on numpy arrays - if isinstance(value, (list, tuple)) and (not value): - # doing nothing here seems to be ok - return - - if isinstance(value, self._dtype.type): - val = value.value - err = value.error - - elif ( - is_list_like(value) - and len(value) > 0 - and isinstance(value[0], self._dtype.type) - ): - val = [item.value for item in value] - err = [item.error for item in value] - - key = check_array_indexer(self, key) - try: - self._nom[key] = val - self._err[key] = err - except IndexError as e: - msg = "Mask is wrong length. {}".format(e) - raise IndexError(msg) - - def _formatter(self, boxed=False): - """Formatting function for scalar values. - This is used in the default '__repr__'. The returned formatting - function receives scalar Uncertainties. - - # type: (bool) -> Callable[[Any], Optional[str]] - - Parameters - ---------- - boxed: bool, default False - An indicated for whether or not your array is being printed - within a Series, DataFrame, or Index (True), or just by - itself (False). This may be useful if you want scalar values - to appear differently within a Series versus on its own (e.g. - quoted or not). - - Returns - ------- - Callable[[Any], str] - A callable that gets instances of the scalar type and - returns a string. By default, :func:`repr` is used - when ``boxed=False`` and :func:`str` is used when - ``boxed=True``. - """ - - def formatting_function(uncertainty: Uncertainty): - return str(uncertainty) - - return formatting_function - - def isna(self): - # type: () -> np.ndarray - """Return a Boolean NumPy array indicating if each value is missing. - - Returns - ------- - missing : np.array - """ - return ~np.isfinite(self._nom) - - def astype(self, dtype, copy=True): - """Cast to a NumPy array with 'dtype'. - - Parameters - ---------- - dtype : str or dtype - Typecode or data-type to which the array is cast. - copy : bool, default True - Whether to copy the data, even if not necessary. If False, - a copy is made only if the old dtype does not match the - new dtype. - - Returns - ------- - array : ndarray - NumPy ndarray with 'dtype' for its dtype. - """ - if isinstance(dtype, str) and self._dtype._match(dtype): - dtype = self._dtype.construct_from_string(dtype) - - if isinstance(dtype, self._dtype): - if dtype == self._dtype and not copy: - return self - else: - return UncertaintyArray( - self.uncertainty.to(dtype.value_dtype), dtype - ) - - # do *not* delegate to __array__ -> is required to return a numpy array, - # but somebody may be requesting another pandas array - # examples are e.g. PyArrow arrays as requested by "string[pyarrow]" - if is_object_dtype(dtype): - return self._to_array_of_quantity(copy=copy) - if is_string_dtype(dtype): - return pd.array([str(x) for x in self.uncertainty], dtype=dtype) - return pd.array(self.uncertainty, dtype, copy) - - @property - def uncertainty(self): - return Uncertainty(self._nom, self._err) - - def take( - self, - indices: Sequence[int], - allow_fill: bool = False, - fill_value: Uncertainty = None, - ): - """Take elements from an array. - - # type: (Sequence[int], bool, Optional[Any]) -> UncertaintyArray - - Parameters - ---------- - indices : sequence of integers - Indices to be taken. - allow_fill : bool, default False - How to handle negative values in `indices`. - * False: negative values in `indices` indicate positional indices - from the right (the default). This is similar to - :func:`numpy.take`. - * True: negative values in `indices` indicate - missing values. These values are set to `fill_value`. Any other - other negative values raise a ``ValueError``. - fill_value : any, optional - Fill value to use for NA-indices when `allow_fill` is True. - This may be ``None``, in which case the default NA value for - the type, ``self.dtype.na_value``, is used. - - Returns - ------- - UncertaintyArray - - Raises - ------ - IndexError - When the indices are out of bounds for the array. - ValueError - When `indices` contains negative values other than ``-1`` - and `allow_fill` is True. - Notes - ----- - UncertaintyArray.take is called by ``Series.__getitem__``, ``.loc``, - ``iloc``, when `indices` is a sequence of values. Additionally, - it's called by :meth:`Series.reindex`, or any other method - that causes realignemnt, with a `fill_value`. - See Also - -------- - numpy.take - pandas.api.extensions.take - Examples - -------- - """ - from pandas.core.algorithms import take - - if allow_fill and fill_value is None: - fill_value = self.dtype.na_value - else: - fill_value = Uncertainty(0, 0) - - value = take( - self._nom, - indices, - fill_value=float(fill_value.value), - allow_fill=allow_fill, - ) - error = take( - self._err, - indices, - fill_value=float(fill_value.error), - allow_fill=allow_fill, - ) - - return UncertaintyArray(value, error, dtype=self.dtype) - - def copy(self, deep: bool = False): - data = (self._nom, self._err) - if deep: - data = copy.deepcopy(data) - else: - data = copy.copy(data) - - return type(self)(*data, dtype=self.dtype) - - @classmethod - def _concat_same_type(cls, to_concat: Sequence[UncertaintyArray]): - v = [] - e = [] - dtype = None - for a in to_concat: - if dtype is None: - dtype = a.dtype - else: - a = a.astype(dtype) - v.append(a._nom) - e.append(a._err) - - return cls(np.concatenate(v), np.concatenate(e), dtype) - - @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): - """ - Initialises a UncertaintyArray from a list like of Uncertainty scalars or a list like of value/error and dtype - ----- - Usage - UncertaintyArray._from_sequence([Uncertainty(1,0),Uncertainty(1,1)]) - """ - - list_of_scalars = [] - for s in scalars: - if isinstance(s, Uncertainty): - list_of_scalars.append(s) - elif isinstance(s, tuple): - list_of_scalars.append(Uncertainty(*s)) - else: - list_of_scalars.append(Uncertainty(s, 0)) - - values = np.asarray([x.value for x in list_of_scalars]) - errors = np.asarray([x.error for x in list_of_scalars]) - - return cls(values, errors, dtype=dtype, copy=copy) - - @classmethod - def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False): - list_of_uncs = [Uncertainty.from_string(x) for x in scalars] - return cls._from_sequence(list_of_uncs, dtype=dtype, copy=copy) - - def value_counts(self, dropna=True): - """ - Returns a Series containing counts of each category. - - Every category will have an entry, even those with a count of 0. - - Parameters - ---------- - dropna : boolean, default True - Don't include counts of NaN. - - Returns - ------- - counts : Series - - See Also - -------- - Series.value_counts - """ - - from pandas import Series - - # compute counts on the data with no nans - data = Uncertainty(self._nom, self._err) - - nafilt = np.isnan(data) - data = data[~nafilt] - - data_list = [Uncertainty(x.value, x.error) for x in data] - index = list(set(data)) - array = [data_list.count(item) for item in index] - - if not dropna: - index.append(np.nan) - array.append(nafilt.sum()) - - return Series(array, index=index) - - def unique(self): - """Compute the UncertaintyArray of unique values. - - Returns - ------- - uniques : UncertaintyArray - """ - from pandas import unique - - data = Uncertainty(self._nom, self._err) - - return self._from_sequence(unique(data), dtype=self.dtype) - - def __contains__(self, item) -> bool: - if not isinstance(item, Uncertainty): - return False - elif pd.isna(item): - return self.isna().any() - else: - return super().__contains__(item) - - @property - def nbytes(self): - return self._nom.nbytes * 2 - - # The _can_hold_na attribute is set to True so that pandas internals - # will use the ExtensionDtype.na_value as the NA value in operations - # such as take(), reindex(), shift(), etc. In addition, those results - # will then be of the ExtensionArray subclass rather than an array - # of objects - _can_hold_na = True - - @property - def _ndarray_values(self): - # type: () -> np.ndarray - """Internal pandas method for lossy conversion to a NumPy ndarray. - This method is not part of the pandas interface. - The expectation is that this is cheap to compute, and is primarily - used for interacting with our indexers. - """ - return np.array(self) - - @classmethod - def _create_method(cls, op, coerce_to_dtype=True): - """ - A class method that returns a method that will correspond to an - operator for an ExtensionArray subclass, by dispatching to the - relevant operator defined on the individual elements of the - ExtensionArray. - Parameters - ---------- - op : function - An operator that takes arguments op(a, b) - coerce_to_dtype : bool - boolean indicating whether to attempt to convert - the result to the underlying ExtensionArray dtype - (default True) - Returns - ------- - A method that can be bound to a method of a class - Example - ------- - Given an ExtensionArray subclass called MyExtensionArray, use - >>> __add__ = cls._create_method(operator.add) - in the class definition of MyExtensionArray to create the operator - for addition, that will be based on the operator implementation - of the underlying elements of the ExtensionArray - """ - - def _binop(self: UncertaintyArray, other): - def validate_length(obj1, obj2): - # validates length - try: - if len(obj1) != len(obj2): - raise ValueError("Lengths must match") - except TypeError: - pass - - def convert_values(param): - # convert to a quantity or listlike - if isinstance(param, cls): - return param.uncertainty - elif isinstance(param, Uncertainty): - return param - elif ( - is_list_like(param) - and len(param) > 0 - and isinstance(param[0], Uncertainty) - ): - return param[0] - else: - return param - - if isinstance(other, (Series, DataFrame)): - return NotImplemented - lvalues = self.uncertainty - validate_length(lvalues, other) - rvalues = convert_values(other) - - # If the operator is not defined for the underlying objects, - # a TypeError should be raised - res = op(lvalues, rvalues) - - if coerce_to_dtype: - try: - val = nominal_values(res) - err = std_devs(res) - res = cls(val, err, dtype=self.dtype) - except TypeError: - pass - - return res - - op_name = f"__{op}__" - return set_function_name(_binop, op_name, cls) - - @classmethod - def _create_arithmetic_method(cls, op): - return cls._create_method(op) - - @classmethod - def _create_comparison_method(cls, op): - return cls._create_method(op, coerce_to_dtype=False) - - def __array__(self, dtype=None, copy=False): - if dtype is None or is_object_dtype(dtype): - return self.uncertainty - if is_string_dtype(dtype): - return np.array([str(x) for x in self.uncertainty], dtype=str) - return Uncertainty(self._nom.astype(dtype), self._err.astype(dtype)) - - def searchsorted(self, value, side="left", sorter=None): - """ - Find indices where elements should be inserted to maintain order. - - .. versionadded:: 0.24.0 - - Find the indices into a sorted array `self` (a) such that, if the - corresponding elements in `v` were inserted before the indices, the - order of `self` would be preserved. - - Assuming that `a` is sorted: - - ====== ============================ - `side` returned index `i` satisfies - ====== ============================ - left ``self[i-1] < v <= self[i]`` - right ``self[i-1] <= v < self[i]`` - ====== ============================ - - Parameters - ---------- - value : array_like - Values to insert into `self`. - side : {'left', 'right'}, optional - If 'left', the index of the first suitable location found is given. - If 'right', return the last such index. If there is no suitable - index, return either 0 or N (where N is the length of `self`). - sorter : 1-D array_like, optional - Optional array of integer indices that sort array a into ascending - order. They are typically the result of argsort. - - Returns - ------- - indices : array of ints - Array of insertion points with the same shape as `value`. - - See Also - -------- - numpy.searchsorted : Similar method from NumPy. - """ - # Note: the base tests provided by pandas only test the basics. - # We do not test - # 1. Values outside the range of the `data_for_sorting` fixture - # 2. Values between the values in the `data_for_sorting` fixture - # 3. Missing values. - - arr = self._nom - if isinstance(value, Uncertainty): - val = value.value - elif ( - is_list_like(value) - and len(value) > 0 - and isinstance(value[0], Uncertainty) - ): - val = [item.value for item in value] - return arr.searchsorted(val, side=side, sorter=sorter) - - def _reduce(self, name, **kwds): - """ - Return a scalar result of performing the reduction operation. - - Parameters - ---------- - name : str - Name of the function, supported values are: - { any, all, min, max, sum, mean, median, prod, - std, var, sem, kurt, skew }. - skipna : bool, default True - If True, skip NaN values. - **kwargs - Additional keyword arguments passed to the reduction function. - Currently, `ddof` is the only supported kwarg. - - Returns - ------- - scalar - - Raises - ------ - TypeError : subclass does not define reductions - """ - - functions = { - "min": np.nanmin, - "max": np.nanmax, - "sum": np.nansum, - "mean": np.nanmean, - "median": np.nanmedian, - "std": np.nanstd, - "var": np.nanvar, - } - if name not in functions: - raise TypeError(f"cannot perform {name} with type {self.dtype}") - - result = functions[name](self.uncertainty, **kwds) - return result - - -UncertaintyArray._add_arithmetic_ops() -UncertaintyArray._add_comparison_ops() -register_extension_dtype(UncertaintyType) - - -@register_dataframe_accessor("uncertainty") -class UncertaintyDataFrameAccessor(object): - def __init__(self, pandas_obj: pd.DataFrame): - self._obj = pandas_obj - - -class UncertaintySeriesAccessor(object): - def __init__(self, pandas_obj: pd.Series): - self._validate(pandas_obj) - self.pandas_obj = pandas_obj - self.uncertainty = pandas_obj.values - self._index = pandas_obj.index - self._name = pandas_obj.name - - @staticmethod - def _validate(obj): - if not is_uncert_type(obj): - raise AttributeError( - "Cannot use 'uncertainty' accessor on objects of " - "dtype '{}'.".format(obj.dtype) - ) - - -class Delegated: - # Descriptor for delegating attribute access to from - # a Series to an underlying array - to_series = True - - def __init__(self, name): - self.name = name - - -class DelegatedProperty(Delegated): - def __get__(self, obj, type=None): - index = object.__getattribute__(obj, "_index") - name = object.__getattribute__(obj, "_name") - result = getattr( - object.__getattribute__(obj, "uncertainty"), self.name - ) - if self.to_series: - if isinstance(result, Uncertainty): - result = UncertaintyArray(result) - return Series(result, index, name=name) - else: - return result - - -class DelegatedScalarProperty(DelegatedProperty): - to_series = False - - -class DelegatedMethod(Delegated): - def __get__(self, obj, type=None): - index = object.__getattribute__(obj, "_index") - name = object.__getattribute__(obj, "_name") - method = getattr( - object.__getattribute__(obj, "uncertainty"), self.name - ) - - def delegated_method(*args, **kwargs): - result = method(*args, **kwargs) - if self.to_series: - if isinstance(result, Uncertainty): - result = UncertaintyArray(result) - result = Series(result, index, name=name) - return result - - return delegated_method - - -class DelegatedScalarMethod(DelegatedMethod): - to_series = False - - -for attr in [ - "debug_used", - "default_format", - "dimensionality", - "dimensionless", - "force_ndarray", - "shape", - "u", - "unitless", - "units", -]: - setattr(UncertaintySeriesAccessor, attr, DelegatedScalarProperty(attr)) -for attr in ["imag", "m", "magnitude", "real"]: - setattr(UncertaintySeriesAccessor, attr, DelegatedProperty(attr)) - -for attr in [ - "check", - "compatible_units", - "format_babel", - "ito", - "ito_base_units", - "ito_reduced_units", - "ito_root_units", - "plus_minus", - "put", - "to_tuple", - "tolist", -]: - setattr(UncertaintySeriesAccessor, attr, DelegatedScalarMethod(attr)) -for attr in [ - "clip", - "from_tuple", - "m_as", - "searchsorted", - "to", - "to_base_units", - "to_compact", - "to_reduced_units", - "to_root_units", - "to_timedelta", -]: - setattr(UncertaintySeriesAccessor, attr, DelegatedMethod(attr)) - - -def is_uncert_type(obj): - t = getattr(obj, "dtype", obj) - try: - return isinstance(t, UncertaintyType) or issubclass(t, UncertaintyType) - except Exception: - return False - - -# try: -# # for pint < 0.21 we need to explicitly register -# compat.upcast_types.append(UncertaintyArray) -# except AttributeError: -# # for pint = 0.21 we need to add the full names of UncertaintyArray and DataFrame, -# # which is to be added in pint > 0.21 -# compat.upcast_type_map.setdefault("pint_pandas.pint_array.UncertaintyArray", UncertaintyArray) -# compat.upcast_type_map.setdefault("pandas.core.frame.DataFrame", DataFrame) diff --git a/auto_uncertainties/uncertainty.py b/auto_uncertainties/uncertainty.py deleted file mode 100644 index fd26c8d..0000000 --- a/auto_uncertainties/uncertainty.py +++ /dev/null @@ -1,614 +0,0 @@ -# -*- coding: utf-8 -*- -# Based heavily on the implementation of pint's Quantity object -from __future__ import annotations - -import copy -import locale -import operator -import warnings - -import joblib -import numpy as np -from typing_extensions import Generic, TypeVar - -from . import NegativeStdDevError, NumpyDowncastWarning -from .util import ( - Display, - ignore_numpy_downcast_warnings, - ignore_runtime_warnings, - is_np_duck_array, - strip_device_array, -) -from .wrap_numpy import HANDLED_FUNCTIONS, HANDLED_UFUNCS, wrap_numpy - - -def _check_units(value, err): - mag_has_units = hasattr(value, "units") - mag_units = getattr(value, "units", None) - err_has_units = hasattr(err, "units") - err_units = getattr(err, "units", None) - - if mag_has_units and mag_units is not None: - Q = mag_units._REGISTRY.Quantity - ret_val = Q(value).to(mag_units).m - if err is not None: - ret_err = Q(err).to(mag_units).m - else: - ret_err = None - ret_units = mag_units - # This branch will never actually work, but its here - # to raise a Dimensionality error without needing to import pint - elif err_has_units: - Q = err_units._REGISTRY.Quantity - ret_val = Q(value).to(err_units).m - ret_err = Q(err).to(err_units).m - ret_units = err_units - else: - ret_units = None - ret_val = value - ret_err = err - - return ret_val, ret_err, ret_units - - -T = TypeVar("T") - - -class Uncertainty(Display, Generic[T]): - __apply_to_both_ndarray__ = [ - "flatten", - "real", - "imag", - "astype", - "T", - "reshape", - ] - __ndarray_attributes__ = ["dtype", "ndim", "size"] - - __array_priority__ = 18 - - _nom: T - _err: T - - @ignore_numpy_downcast_warnings - def __init__(self, value: T, err: T | None = None): - if hasattr(value, "units") or hasattr(err, "units"): - raise NotImplementedError( - "Uncertainty cannot have units! Call Uncertainty.from_quantities instead." - ) - - value_ = strip_device_array(value) - if err is not None: - err_ = strip_device_array(err) - else: - err_ = None - - # If Uncertatity - if isinstance(value_, self.__class__): - magnitude_nom = value_.value - magnitude_err = value_.error - # If sequence - elif isinstance(value_, list): - inst = self.__class__.from_list(value_) - magnitude_nom = inst.value - magnitude_err = inst.error - # If arrays - elif np.ndim(value_) > 0: - magnitude_nom = np.asarray(value_) - if err_ is None: - magnitude_err = np.zeros_like(value_) - else: - if np.ndim(err_) == 0: - magnitude_err = np.ones_like(value_) * err_ - else: - magnitude_err = np.asarray(err_) - assert magnitude_err.shape == magnitude_nom.shape - # If scalar - else: - magnitude_nom = value_ - if err_ is None: - magnitude_err = 0.0 - else: - magnitude_err = err_ - - # Replace NaNs in errors with zeros - if is_np_duck_array(type(magnitude_err)): - magnitude_err[~np.isfinite(magnitude_err)] = 0 - else: - if not np.isfinite(magnitude_err): - magnitude_err = 0 - - # Basic sanity checks - if is_np_duck_array(type(magnitude_nom)): - match_items = self.__ndarray_attributes__ + ["shape"] - try: - match_items.remove("dtype") - except ValueError: - pass - for item in match_items: - if not getattr(magnitude_nom, item) == getattr( - magnitude_err, item - ): - raise ValueError( - f"Attribute {item} does not match for value and error! ({getattr(magnitude_nom,item)} and {getattr(magnitude_err,item)})" - ) - err_mag = np.atleast_1d(magnitude_err) - if np.any(err_mag[np.isfinite(err_mag)] < 0): - valid = err_mag[np.isfinite(err_mag)] - - raise NegativeStdDevError( - f"Found negatives values for the standard deviation... {valid[valid < 0]}" - ) - - self._nom = magnitude_nom - self._err = magnitude_err - - def __bytes__(self) -> bytes: - return str(self).encode(locale.getpreferredencoding()) - - def __iter__(self): - for v, e in zip(self._nom, self._err): - yield self.__class__(v, e) - - def __copy__(self) -> Uncertainty[T]: - ret = self.__class__(copy.copy(self._nom), copy.copy(self._err)) - - return ret - - def __deepcopy__(self, memo) -> Uncertainty[T]: - ret = self.__class__( - copy.deepcopy(self._nom, memo), copy.deepcopy(self._err, memo) - ) - return ret - - def __hash__(self) -> int: - digest = joblib.hash((self._nom, self._err), hash_name="sha1") - return int.from_bytes(bytes(digest, encoding="utf-8"), "big") - - @property - def value(self): - return self._nom - - @property - def error(self): - return self._err - - @property - def relative(self): - if np.ndim(self._nom) == 0: - try: - return self._err / self._nom - except OverflowError: - return np.inf - except ZeroDivisionError: - return np.NaN - else: - rel = np.zeros_like(self._nom) - valid = np.isfinite(self._nom) & (self._nom > 0) - rel[valid] = self._err[valid] / self._nom[valid] - return rel - - @property - def rel(self): - return self.relative - - @property - def rel2(self): - try: - return self.relative**2 - except OverflowError: - return np.inf - - def plus_minus(self, err: float): - val = self._nom - old_err = self._err - new_err = np.sqrt(old_err**2 + err**2) - - return self.__class__(val, new_err) - - @classmethod - def from_string(cls, string: str): - new_str = string.replace("+/-", "±") - new_str = new_str.replace("+-", "±") - if "±" not in new_str: - return Uncertainty(float(string)) - else: - u1, u2 = new_str.split("±") - return cls(float(u1), float(u2)) - - @classmethod - def from_quantities(cls, value, err): - value_, err_, units = _check_units(value, err) - inst = cls(value_, err_) - if units is not None: - inst *= units - return inst - - @classmethod - def from_list(cls, u_list): - return cls.from_sequence(u_list) - - @classmethod - def from_sequence(cls, seq): - _ = iter(seq) - - len_seq = len(seq) - val = np.empty(len_seq) - err = np.empty(len_seq) - if len_seq > 0: - first_item = seq[0] - try: - first_item + 1 - except TypeError: - raise TypeError( - f"Sequence elements of type {type(first_item)} dont support math operations!" - ) - if hasattr(first_item, "units"): - val *= first_item.units - err *= first_item.units - for i, seq_i in enumerate(seq): - try: - val[i] = float(seq_i._nom) - err[i] = float(seq_i._err) - except AttributeError: - val[i] = float(seq_i) - err[i] = 0 - - return cls(val, err) - - def __float__(self) -> Uncertainty[float]: - return float(self._nom) - - def __complex__(self) -> Uncertainty[complex]: - return complex(self._nom) - - def __int__(self) -> Uncertainty[int]: - return int(self._nom) - - # Math Operators - def __add__(self, other): - if isinstance(other, Uncertainty): - new_mag = self._nom + other._nom - new_err = np.sqrt(self._err**2 + other._err**2) - else: - new_mag = self._nom + other - new_err = self._err - try: - return self.__class__(new_mag, new_err) - except NotImplementedError: - return NotImplemented - - __radd__ = __add__ - - def __sub__(self, other): - if isinstance(other, Uncertainty): - new_mag = self._nom - other._nom - new_err = np.sqrt(self._err**2 + other._err**2) - else: - new_mag = self._nom - other - new_err = self._err - try: - return self.__class__(new_mag, new_err) - except NotImplementedError: - return NotImplemented - - def __rsub__(self, other): - return -self.__sub__(other) - - def __mul__(self, other): - if isinstance(other, Uncertainty): - new_mag = self._nom * other._nom - new_err = np.abs(new_mag) * np.sqrt(self.rel2 + other.rel2) - else: - new_mag = self._nom * other - new_err = np.abs(self._err * other) - try: - return self.__class__(new_mag, new_err) - except NotImplementedError: - return NotImplemented - - __rmul__ = __mul__ - - @ignore_runtime_warnings - def __truediv__(self, other): - if isinstance(other, Uncertainty): - new_mag = self._nom / other._nom - new_err = np.abs(new_mag) * np.sqrt(self.rel2 + other.rel2) - else: - new_mag = self._nom / other - new_err = np.abs(self._err / other) - try: - return self.__class__(new_mag, new_err) - except NotImplementedError: - return NotImplemented - - @ignore_runtime_warnings - def __rtruediv__(self, other): - # Other / Self - if isinstance(other, Uncertainty): - raise Exception - else: - new_mag = other / self._nom - new_err = np.abs(new_mag) * np.abs(self.rel) - try: - return self.__class__(new_mag, new_err) - except NotImplementedError: - return NotImplemented - - __div__ = __truediv__ - __rdiv__ = __rtruediv__ - - def __floordiv__(self, other): - if isinstance(other, Uncertainty): - new_mag = self._nom // other._nom - new_err = 0.0 - else: - new_mag = self._nom // other - new_err = 0.0 - return self.__class__(new_mag, new_err) - - def __rfloordiv__(self, other): - if isinstance(other, Uncertainty): - return other.__truediv__(self) - else: - new_mag = other // self._nom - new_err = 0.0 - return self.__class__(new_mag, new_err) - - def __mod__(self, other): - if isinstance(other, Uncertainty): - new_mag = self._nom % other._nom - else: - new_mag = self._nom % other - if np.ndim(new_mag) == 0: - new_err = 0.0 - else: - new_err = np.zeros_like(new_mag) - return self.__class__(new_mag, new_err) - - def __rmod__(self, other): - new_mag = other % self._nom - if np.ndim(new_mag) == 0: - new_err = 0.0 - else: - new_err = np.zeros_like(new_mag) - return self.__class__(new_mag, new_err) - - def __divmod__(self, other): - return self // other, self % other - - def __rdivmod__(self, other): - return other // self, other % self - - @ignore_runtime_warnings - def __pow__(self, other): - # Self ** other - A = self._nom - sA = self._err - if isinstance(other, Uncertainty): - B = other._nom - else: - B = other - new_mag = A**B - new_err = np.abs(new_mag) * np.sqrt((B / A * sA) ** 2) - - return self.__class__(new_mag, new_err) - - @ignore_runtime_warnings - def __rpow__(self, other): - # Other ** self - B = self._nom - sB = self._err - if isinstance(other, Uncertainty): - A = other._nom - sA = other._err - else: - A = other - sA = 0 - - new_mag = A**B - new_err = np.abs(new_mag) * np.sqrt( - (B / A * sA) ** 2 + (np.log(A) * sB) ** 2 - ) - - return self.__class__(new_mag, new_err) - - def __abs__(self): - return self.__class__(abs(self._nom), self._err) - - def __round__(self, ndigits): - return self.__class__(round(self._nom, ndigits=ndigits), self._err) - - def __pos__(self): - return self.__class__(operator.pos(self._nom), self._err) - - def __neg__(self): - return self.__class__(operator.neg(self._nom), self._err) - - def __eq__(self, other): - if isinstance(other, Uncertainty): - return self._nom == other._nom - else: - return self._nom == other - - def __ne__(self, other): - out = self.__eq__(other) - if is_np_duck_array(type(out)): - return np.logical_not(out) - else: - return not out - - def compare(self, other, op): - if isinstance(other, Uncertainty): - return op(self._nom, other._nom) - else: - return op(self._nom, other) - - __lt__ = lambda self, other: self.compare( # noqa: E731 - other, op=operator.lt - ) - __le__ = lambda self, other: self.compare( # noqa: E731 - other, op=operator.le - ) - __ge__ = lambda self, other: self.compare( # noqa: E731 - other, op=operator.ge - ) - __gt__ = lambda self, other: self.compare( # noqa: E731 - other, op=operator.gt - ) - - def __bool__(self) -> bool: - return bool(self._nom) - - __nonzero__ = __bool__ - - # NumPy function/ufunc support - @ignore_runtime_warnings - def __array_function__(self, func, types, args, kwargs): - if func.__name__ not in HANDLED_FUNCTIONS: - return NotImplemented - elif not any(issubclass(t, self.__class__) for t in types): - return NotImplemented - else: - return wrap_numpy("function", func, args, kwargs) - - @ignore_runtime_warnings - def __array_ufunc__(self, ufunc, method, *args, **kwargs): - if method != "__call__": - raise NotImplementedError - else: - if ufunc.__name__ not in HANDLED_UFUNCS: - raise NotImplementedError( - f"Ufunc {ufunc.__name__} is not implemented!" - ) from None - else: - return wrap_numpy("ufunc", ufunc, args, kwargs) - - def __getattr__(self, item): - if item.startswith("__array_"): - # Handle array protocol attributes other than `__array__` - raise AttributeError( - f"Array protocol attribute {item} not available." - ) - elif item in self.__apply_to_both_ndarray__: - val = getattr(self._nom, item) - err = getattr(self._err, item) - - if callable(val): - return lambda *args, **kwargs: self.__class__( - val(*args, **kwargs), err(*args, **kwargs) - ) - else: - return self.__class__(val, err) - elif item in HANDLED_UFUNCS: - return lambda *args, **kwargs: wrap_numpy( - "ufunc", item, [self] + list(args), kwargs - ) - elif item in HANDLED_FUNCTIONS: - return lambda *args, **kwargs: wrap_numpy( - "function", item, [self] + list(args), kwargs - ) - elif item in self.__ndarray_attributes__: - return getattr(self._nom, item) - else: - raise AttributeError( - f"Attribute {item} not available in Uncertainty, or as NumPy ufunc or function." - ) from None - - def __array__(self, t=None) -> np.ndarray: - warnings.warn( - "The uncertainty is stripped when downcasting to ndarray.", - NumpyDowncastWarning, - stacklevel=2, - ) - return np.asarray(self._nom) - - def clip(self, min=None, max=None, out=None, **kwargs): - return self.__class__( - self._nom.clip(min, max, out, **kwargs), self._err - ) - - def fill(self, value) -> None: - return self._nom.fill(value) - - def put(self, indices, values, mode="raise") -> None: - if isinstance(values, self.__class__): - self._nom.put(indices, values._nom, mode) - self._err.put(indices, values._err, mode) - else: - raise ValueError( - "Can only 'put' Uncertainties into uncertainties!" - ) - - def copy(self): - return Uncertainty(self._nom.copy(), self._err.copy()) - - # Special properties - @property - def flat(self): - for u, v in (self._nom.flat, self._err.flat): - yield self.__class__(u, v) - - @property - def shape(self): - return self._nom.shape - - @shape.setter - def shape(self, value): - self._nom.shape = value - self._err.shape = value - - @property - def nbytes(self): - return self._nom.nbytes + self._err.nbytes - - def searchsorted(self, v, side="left", sorter=None): - return self._nom.searchsorted(v, side) - - def __len__(self) -> int: - return len(self._nom) - - def __getitem__(self, key): - try: - return self.__class__(self._nom[key], self._err[key]) - except TypeError: - raise TypeError(f"Index {key} not supported!") - - def __setitem__(self, key, value): - if not isinstance(value, self.__class__): - raise ValueError( - f"Can only pass Uncertainty type to __setitem__! Instead passed {type(value)}" - ) - try: - _ = self._nom[key] - except ValueError as exc: - raise ValueError( - f"Object {type(self._nom)} does not support indexing" - ) from exc - - self._nom[key] = value._nom - self._err[key] = value._err - - def tolist(self): - try: - nom = self._nom.tolist() - err = self._err.tolist() - if not isinstance(nom, list): - return self.__class__(nom, err) - else: - return [ - ( - self.__class__(n, e).tolist() - if isinstance(n, list) - else self.__class__(n, e) - ) - for n, e in (nom, err) - ] - except AttributeError: - raise AttributeError( - f"{type(self._nom).__name__}' does not support tolist." - ) - - @property - def ndim(self): - return np.ndim(self._nom) diff --git a/auto_uncertainties/uncertainty/__init__.py b/auto_uncertainties/uncertainty/__init__.py new file mode 100644 index 0000000..98f8c88 --- /dev/null +++ b/auto_uncertainties/uncertainty/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import lazy_loader + + +__getattr__, __dir__, __all__ = lazy_loader.attach_stub(__name__, __file__) + +__all__ = [ + "ScalarUncertainty", + "Uncertainty", + "VectorUncertainty", + "nominal_values", + "set_compare_error", + "set_downcast_error", + "std_devs", + "uncertainty_containers", +] diff --git a/auto_uncertainties/uncertainty/__init__.pyi b/auto_uncertainties/uncertainty/__init__.pyi new file mode 100644 index 0000000..71b9b99 --- /dev/null +++ b/auto_uncertainties/uncertainty/__init__.pyi @@ -0,0 +1,22 @@ +from . import uncertainty_containers + +from .uncertainty_containers import ( + ScalarUncertainty, + Uncertainty, + VectorUncertainty, + nominal_values, + set_compare_error, + set_downcast_error, + std_devs, +) + +__all__ = [ + "ScalarUncertainty", + "Uncertainty", + "VectorUncertainty", + "nominal_values", + "set_compare_error", + "set_downcast_error", + "std_devs", + "uncertainty_containers", +] diff --git a/auto_uncertainties/uncertainty/uncertainty_containers.py b/auto_uncertainties/uncertainty/uncertainty_containers.py new file mode 100644 index 0000000..243737e --- /dev/null +++ b/auto_uncertainties/uncertainty/uncertainty_containers.py @@ -0,0 +1,876 @@ +# Based heavily on the implementation of pint's Quantity object +from __future__ import annotations + +from collections.abc import Sequence +import copy +import locale +import math +import operator +from typing import Generic, TypeVar +import warnings + +import joblib +import numpy as np +from numpy.typing import NDArray + +from auto_uncertainties import ( + DowncastError, + DowncastWarning, + NegativeStdDevError, +) +from auto_uncertainties.display_format import ScalarDisplay, VectorDisplay +from auto_uncertainties.numpy import HANDLED_FUNCTIONS, HANDLED_UFUNCS, wrap_numpy +from auto_uncertainties.util import ( + ignore_numpy_downcast_warnings, + ignore_runtime_warnings, +) + +ERROR_ON_DOWNCAST = False +COMPARE_RTOL = 1e-9 + +__all__ = [ + "Uncertainty", + "VectorUncertainty", + "ScalarUncertainty", + "set_downcast_error", + "set_compare_error", + "nominal_values", + "std_devs", +] + + +def set_downcast_error(val: bool): + """Set whether errors occur when uncertainty is stripped""" + global ERROR_ON_DOWNCAST + ERROR_ON_DOWNCAST = val + + +def set_compare_error(val: float): + global COMPARE_RTOL + COMPARE_RTOL = val + + +def _check_units(value, err): + mag_has_units = hasattr(value, "units") + mag_units = getattr(value, "units", None) + err_has_units = hasattr(err, "units") + err_units = getattr(err, "units", None) + + if mag_has_units and mag_units is not None: + Q = mag_units._REGISTRY.Quantity + ret_val = Q(value).to(mag_units).m + ret_err = Q(err).to(mag_units).m if err is not None else None + ret_units = mag_units + # This branch will never actually work, but its here + # to raise a Dimensionality error without needing to import pint + elif err_has_units: + Q = err_units._REGISTRY.Quantity # type: ignore + ret_val = Q(value).to(err_units).m + ret_err = Q(err).to(err_units).m + ret_units = err_units + else: + ret_units = None + ret_val = value + ret_err = err + + return ret_val, ret_err, ret_units + + +def nominal_values(x) -> T: + """Return the central value of an Uncertainty object if it is one, otherwise returns the object""" + # Is an Uncertainty + if hasattr(x, "_nom"): + return x.value + else: + if np.ndim(x) > 0: + try: + x2 = Uncertainty.from_sequence(x) + except Exception: + return x + else: + return x2.value + else: + try: + x2 = Uncertainty(x) + except Exception: + return x + else: + if isinstance(x2, float): + return x2 + else: + return x2.value + + +def std_devs(x) -> T: + """Return the uncertainty of an Uncertainty object if it is one, otherwise returns zero""" + # Is an Uncertainty + if hasattr(x, "_err"): + return x.error + else: + if np.ndim(x) > 0: + try: + x2 = Uncertainty.from_sequence(x) + except Exception: + return np.zeros_like(x) + else: + return x2.error + else: + try: + x2 = Uncertainty(x) + except Exception: + return 0 + else: + if isinstance(x2, float): + return 0 + else: + return x2.error + + +ST = TypeVar("ST", float, int) +T = TypeVar("T", NDArray, float, int) + + +class Uncertainty(Generic[T]): + """Base class for Uncertainty objects + + + Parameters + ---------- + + value : + The central value(s) + + err: + The uncertainty value(s). Zero if not provided. Negative numbers raise a RuntimeError. + + """ + + _nom: T + _err: T + + def __getstate__(self): + return {"nominal_value": self._nom, "std_devs": self._err} + + def __setstate__(self, state): + self._nom = state["nominal_value"] + self._err = state["std_devs"] + + def __getnewargs__(self): + return (self._nom, self._err) + + @ignore_numpy_downcast_warnings + def __new__(cls: type[Uncertainty], value: T | Uncertainty, err=None): + # If instantiated with an Uncertainty subclass + if isinstance(value, ScalarUncertainty | VectorUncertainty): + err = value.error + value = value.value + # If instantiated with a list or tuple of uncertainties + elif isinstance(value, list | tuple): + inst = cls.from_list(value) + value = inst.value + err = inst.error + + nan = False + # Numpy arrays + if np.ndim(value) > 0: + vector = True + # Zero error + if err is None: + err = np.zeros_like(value) + else: + # Constant error + if np.ndim(err) == 0: + err = np.ones_like(value) * err + else: + assert np.ndim(value) == np.ndim(err) + assert np.shape(value) == np.shape(err) + # replace NaN with zero in errors + err[~np.isfinite(err)] = 0 + + if np.any(err < 0): + msg = f"Found {np.count_nonzero(err < 0)} negative values for the standard deviation!" + raise NegativeStdDevError(msg) + else: + vector = False + if np.isfinite(value): + nan = False + if np.isfinite(err) and err < 0: + msg = f"Found negative value ({err}) for the standard deviation!" + raise NegativeStdDevError(msg) + elif err is None or not np.isfinite(err): + err = 0.0 + else: + nan = True + + if nan: + inst = np.nan + else: + if vector: + inst = object.__new__(VectorUncertainty) + else: + inst = object.__new__(ScalarUncertainty) + + inst.__init__(value, err, trigger=True) + return inst + + def __init__(self, value: T, err: T | None, *, trigger=False): + if trigger: + if hasattr(value, "units") or hasattr(err, "units"): + msg = "Uncertainty cannot have units! Call Uncertainty.from_quantities instead." + raise NotImplementedError(msg) + + self._nom = value + self._err = err + + def __copy__(self) -> Uncertainty[T]: + return self.__class__(copy.copy(self._nom), copy.copy(self._err)) + + def __deepcopy__(self, memo) -> Uncertainty[T]: + return self.__class__( + copy.deepcopy(self._nom, memo), copy.deepcopy(self._err, memo) + ) + + @property + def value(self) -> T: + """The central value of the Uncertainty object""" + return self._nom + + @property + def error(self) -> T: + """The uncertainty value of the Uncertainty object""" + return self._err + + @property + def relative(self) -> T: + """The relative uncertainty of the Uncertainty object""" + raise NotImplementedError + + @property + def rel(self) -> T: + """Alias for relative property""" + return self.relative + + @property + def rel2(self) -> T: + """The square of the relative uncertainty of the Uncertainty object""" + raise NotImplementedError + + def plus_minus(self, err: T): + """Add an error to the Uncertainty object""" + val = self._nom + old_err = self._err + new_err = np.sqrt(old_err**2 + err**2) + + return self.__class__(val, new_err) + + @classmethod + def from_string(cls, string: str): + """Create an Uncertainty object from a string representation of the value and error. + + Parameters + ---------- + string : str + A string representation of the value and error. The error can be represented as "+/-" or "±". For instance, 5.0 +- 1.0 or 5.0 ± 1.0. + """ + new_str = string.replace("+/-", "±") + new_str = new_str.replace("+-", "±") + if "±" not in new_str: + return Uncertainty(float(string)) + else: + u1, u2 = new_str.split("±") + return Uncertainty(float(u1), float(u2)) + + @classmethod + def from_quantities(cls, value, err): + """Create an Uncertainty object from two `Pint` quantities + + Parameters + ---------- + value : pint.Quantity + The central value of the Uncertainty object + err : pint.Quantity + The uncertainty value of the Uncertainty object + """ + value_, err_, units = _check_units(value, err) + inst = cls(value_, err_) + if units is not None: + inst *= units + return inst + + @classmethod + def from_list(cls, u_list: Sequence[Uncertainty]): + """Create an Uncertainty object from a list of Uncertainty objects + + Parameters + ---------- + u_list : + A list of Uncertainty objects + """ + return cls.from_sequence(u_list) + + @classmethod + def from_sequence(cls, seq: Sequence[Uncertainty]): + """Create an Uncertainty object from a sequence of Uncertainty objects + + Parameters + ---------- + seq : + A list of Uncertainty objects + """ + _ = iter(seq) + + len_seq = len(seq) + val = np.empty(len_seq) + err = np.empty(len_seq) + if len_seq > 0: + first_item = seq[0] + try: + first_item + 1 + except TypeError: + msg = f"Sequence elements of type {type(first_item)} dont support math operations!" + raise TypeError(msg) from None + if hasattr(first_item, "units"): + val *= first_item.units + err *= first_item.units + for i, seq_i in enumerate(seq): + try: + val[i] = float(seq_i._nom) + err[i] = float(seq_i._err) + except AttributeError: + val[i] = float(seq_i) + err[i] = 0 + + return cls(val, err) + + _HANDLED_TYPES = (np.ndarray, float, int) + + # Math Operators + def __add__(self, other): + if isinstance(other, Uncertainty): + new_mag = self._nom + other._nom + new_err = np.sqrt(self._err**2 + other._err**2) + elif isinstance(other, self._HANDLED_TYPES): + new_mag = self._nom + other + new_err = self._err + else: + return NotImplemented + try: + return self.__class__(new_mag, new_err) + except NotImplementedError: + return NotImplemented + + __radd__ = __add__ + + def __sub__(self, other): + if isinstance(other, Uncertainty): + new_mag = self._nom - other._nom + new_err = np.sqrt(self._err**2 + other._err**2) + elif isinstance(other, self._HANDLED_TYPES): + new_mag = self._nom - other + new_err = self._err + else: + return NotImplemented + try: + return self.__class__(new_mag, new_err) + except NotImplementedError: + return NotImplemented + + def __rsub__(self, other): + return -self.__sub__(other) + + def __mul__(self, other): + if isinstance(other, Uncertainty): + new_mag = self._nom * other._nom + new_err = np.abs(new_mag) * np.sqrt(self.rel2 + other.rel2) + elif isinstance(other, self._HANDLED_TYPES): + new_mag = self._nom * other + new_err = np.abs(self._err * other) + else: + return NotImplemented + try: + return self.__class__(new_mag, new_err) + except NotImplementedError: + return NotImplemented + + __rmul__ = __mul__ + + @ignore_runtime_warnings + def __truediv__(self, other): + if isinstance(other, Uncertainty): + new_mag = self._nom / other._nom + new_err = np.abs(new_mag) * np.sqrt(self.rel2 + other.rel2) + elif isinstance(other, self._HANDLED_TYPES): + new_mag = self._nom / other + new_err = np.abs(self._err / other) + else: + return NotImplemented + try: + return self.__class__(new_mag, new_err) + except NotImplementedError: + return NotImplemented + + @ignore_runtime_warnings + def __rtruediv__(self, other): + # Other / Self + if isinstance(other, Uncertainty): + raise TypeError + elif isinstance(other, self._HANDLED_TYPES): + new_mag = other / self._nom + new_err = np.abs(new_mag) * np.abs(self.rel) + else: + return NotImplemented + try: + return self.__class__(new_mag, new_err) + except NotImplementedError: + return NotImplemented + + __div__ = __truediv__ + __rdiv__ = __rtruediv__ + + def __floordiv__(self, other): + if isinstance(other, Uncertainty): + new_mag = self._nom // other._nom + elif isinstance(other, self._HANDLED_TYPES): + new_mag = self._nom // other + else: + return NotImplemented + new_err = self.__div__(other).error + + return self.__class__(new_mag, new_err) + + def __rfloordiv__(self, other): + if isinstance(other, Uncertainty): + return other.__floordiv__(self) + elif isinstance(other, self._HANDLED_TYPES): + new_mag = other // self._nom + new_err = self.__rdiv__(other).error + + return self.__class__(new_mag, new_err) + else: + return NotImplemented + + def __mod__(self, other): + if isinstance(other, Uncertainty): + new_mag = self._nom % other._nom + elif isinstance(other, self._HANDLED_TYPES): + new_mag = self._nom % other + else: + return NotImplemented + new_err = 0.0 if np.ndim(new_mag) == 0 else np.zeros_like(new_mag) + return self.__class__(new_mag, new_err) + + def __rmod__(self, other): + new_mag = other % self._nom + if np.ndim(new_mag) == 0: + new_err = 0.0 + elif isinstance(other, self._HANDLED_TYPES): + new_err = np.zeros_like(new_mag) + else: + return NotImplemented + return self.__class__(new_mag, new_err) + + def __divmod__(self, other): + return self // other, self % other + + def __rdivmod__(self, other): + return other // self, other % self + + @ignore_runtime_warnings + def __pow__(self, other): + # Self ** other + A = self._nom + sA = self._err + if isinstance(other, Uncertainty): + B = other._nom + sB = other._err + + elif isinstance(other, self._HANDLED_TYPES): + B = other + sB = 0 + else: + return NotImplemented + + new_mag = A**B + new_err = new_err = np.abs(new_mag) * np.sqrt( + (B / A * sA) ** 2 + (np.log(np.abs(A)) * sB) ** 2 + ) + + return self.__class__(new_mag, new_err) + + @ignore_runtime_warnings + def __rpow__(self, other): + # Other ** self + B = self._nom + sB = self._err + if isinstance(other, Uncertainty): + A = other._nom + sA = other._err + elif isinstance(other, self._HANDLED_TYPES): + A = other + sA = 0 + else: + return NotImplemented + new_mag = A**B + new_err = np.abs(new_mag) * np.sqrt( + (B / A * sA) ** 2 + (np.log(np.abs(A)) * sB) ** 2 + ) + + return self.__class__(new_mag, new_err) + + def __abs__(self): + return self.__class__(abs(self._nom), self._err) + + def __pos__(self): + return self.__class__(operator.pos(self._nom), self._err) + + def __neg__(self): + return self.__class__(operator.neg(self._nom), self._err) + + def _compare(self, other, op): + if isinstance(other, Uncertainty): + return op(self._nom, other._nom) + else: + return op(self._nom, other) + + __lt__ = lambda self, other: self._compare( # noqa: E731 + other, op=operator.lt + ) + __le__ = lambda self, other: self._compare( # noqa: E731 + other, op=operator.le + ) + __ge__ = lambda self, other: self._compare( # noqa: E731 + other, op=operator.ge + ) + __gt__ = lambda self, other: self._compare( # noqa: E731 + other, op=operator.gt + ) + + def __bool__(self) -> bool: + return bool(self._nom) + + __nonzero__ = __bool__ + + # NumPy function/ufunc support + @ignore_runtime_warnings + def __array_function__(self, func, types, args, kwargs): + if func.__name__ not in HANDLED_FUNCTIONS or not any( + issubclass(t, self.__class__) for t in types + ): + return NotImplemented + else: + return wrap_numpy("function", func, args, kwargs) + + @ignore_runtime_warnings + def __array_ufunc__(self, ufunc, method, *args, **kwargs): + if method != "__call__": + raise NotImplementedError + else: + if ufunc.__name__ not in HANDLED_UFUNCS: + msg = f"Ufunc {ufunc.__name__} is not implemented!" + raise NotImplementedError(msg) from None + else: + return wrap_numpy("ufunc", ufunc, args, kwargs) + + def __getattr__(self, item): + if item.startswith("__array_"): + # Handle array protocol attributes other than `__array__` + msg = f"Array protocol attribute {item} not available." + raise AttributeError(msg) + elif item in HANDLED_UFUNCS: + return lambda *args, **kwargs: wrap_numpy( + "ufunc", item, [self, *list(args)], kwargs + ) + elif item in HANDLED_FUNCTIONS: + return lambda *args, **kwargs: wrap_numpy( + "function", item, [self, *list(args)], kwargs + ) + else: + msg = f"Attribute {item} not available in Uncertainty, or as NumPy ufunc or function." + raise AttributeError(msg) from None + + +class VectorUncertainty(VectorDisplay, Uncertainty[np.ndarray]): + __apply_to_both_ndarray__ = ( + "flatten", + "real", + "imag", + "astype", + "T", + "reshape", + ) + __ndarray_attributes__ = ("dtype", "ndim", "size") + + __array_priority__ = 18 + + # More numpy capabilities exposed here + def __getattr__(self, item): + if item.startswith("__array_"): + # Handle array protocol attributes other than `__array__` + msg = f"Array protocol attribute {item} not available." + raise AttributeError(msg) + elif item in self.__apply_to_both_ndarray__: + val = getattr(self._nom, item) + err = getattr(self._err, item) + + if callable(val): + return lambda *args, **kwargs: self.__class__( + val(*args, **kwargs), err(*args, **kwargs) + ) + else: + return self.__class__(val, err) + elif item in HANDLED_UFUNCS: + return lambda *args, **kwargs: wrap_numpy( + "ufunc", item, [self, *list(args)], kwargs + ) + elif item in HANDLED_FUNCTIONS: + return lambda *args, **kwargs: wrap_numpy( + "function", item, [self, *list(args)], kwargs + ) + elif item in self.__ndarray_attributes__: + return getattr(self._nom, item) + else: + msg = f"Attribute {item} not available in Uncertainty, or as NumPy ufunc or function." + raise AttributeError(msg) from None + + def __init__(self, value: T, err: T | None = None, *, trigger=False): + if trigger: + super().__init__(value=value, err=err, trigger=trigger) + if np.ndim(self._nom) == 0: + msg = "VectorUncertainty must have a dimension greater than 0!" + raise ValueError(msg) + + def __ne__(self, other): + out = self.__eq__(other) + return np.logical_not(out) + + def __bytes__(self) -> bytes: + return str(self).encode(locale.getpreferredencoding()) + + def __iter__(self): + for v, e in zip(self._nom, self._err, strict=False): + yield self.__class__(v, e) + + def __eq__(self, other): + if isinstance(other, Uncertainty): + ret = self._nom == other._nom + else: + ret = self._nom == other + return ret + + @property + def relative(self): + rel = np.zeros_like(self._nom) + valid = np.isfinite(self._nom) & (self._nom > 0) + rel[valid] = self._err[valid] / self._nom[valid] + return rel + + @property + def rel2(self): + return self.relative**2 + + def __round__(self, ndigits): + return self.__class__(np.round(self._nom, decimals=ndigits), self._err) + + def __array__(self, t=None) -> np.ndarray: + if ERROR_ON_DOWNCAST: + msg = "The uncertainty is stripped when downcasting to ndarray." + raise DowncastError(msg) + else: + warnings.warn( + "The uncertainty is stripped when downcasting to ndarray.", + DowncastWarning, + stacklevel=2, + ) + return np.asarray(self._nom) + + def clip(self, min=None, max=None, out=None, **kwargs): # noqa: A002 + """Numpy clip implementation""" + return self.__class__(self._nom.clip(min, max, out, **kwargs), self._err) + + def fill(self, value) -> None: + """Numpy fill implementation""" + return self._nom.fill(value) + + def put(self, indices, values, mode="raise") -> None: + """Numpy put implementation""" + if isinstance(values, self.__class__): + self._nom.put(indices, values._nom, mode) + self._err.put(indices, values._err, mode) + else: + msg = "Can only 'put' Uncertainties into uncertainties!" + raise TypeError(msg) + + def copy(self): + """Return a copy of the Uncertainty object""" + return Uncertainty(self._nom.copy(), self._err.copy()) + + # Special properties + @property + def flat(self): + """ "numpy flat implementation""" + for u, v in (self._nom.flat, self._err.flat): + yield self.__class__(u, v) + + @property + def shape(self): + """Numpy shape implemenetation""" + return self._nom.shape + + @shape.setter + def shape(self, value): + self._nom.shape = value + self._err.shape = value + + @property + def nbytes(self): + """Numpy nbytes implementation""" + return self._nom.nbytes + self._err.nbytes + + def searchsorted(self, v, side="left", sorter=None): + """numpy searchsorted implementation""" + return self._nom.searchsorted(v, side) + + def __len__(self) -> int: + return len(self._nom) + + def __getitem__(self, key): + try: + return Uncertainty(self._nom[key], self._err[key]) + except TypeError: + msg = f"Index {key} not supported!" + raise TypeError(msg) from None + + def __setitem__(self, key, value): + # If value is nan, just set the value in those regions to nan and return. This is the only case where a scalar can be passed as an argument! + if not isinstance(value, Uncertainty): + if not np.isfinite(value): + self._nom[key] = value + self._err[key] = 0 + return + else: + msg = f"Can only pass Uncertainty type to __setitem__! Instead passed {type(value)}" + raise ValueError(msg) + + try: + _ = self._nom[key] + except ValueError as exc: + msg = f"Object {type(self._nom)} does not support indexing" + raise ValueError(msg) from exc + + if np.size(value._nom) == 1 and np.ndim(value._nom) > 0: + self._nom[key] = value._nom[0] + self._err[key] = value._err[0] + else: + self._nom[key] = value._nom + self._err[key] = value._err + + def tolist(self): + """numpy tolist implementation""" + try: + nom = self._nom.tolist() + err = self._err.tolist() + if not isinstance(nom, list): + return self.__class__(nom, err) + else: + return [ + ( + self.__class__(n, e).tolist() + if isinstance(n, list) + else self.__class__(n, e) + ) + for n, e in (nom, err) + ] + except AttributeError: + msg = f"{type(self._nom).__name__}' does not support tolist." + raise AttributeError(msg) from None + + @property + def ndim(self): + """numpy ndim implementation""" + return np.ndim(self._nom) + + def view(self): + """numpy view implementation""" + return self.__class__(self._nom.view(), self._err.view()) + + def __hash__(self) -> int: + digest = joblib.hash((self._nom, self._err), hash_name="sha1") + return int.from_bytes(bytes(digest, encoding="utf-8"), "big") + + +class ScalarUncertainty(ScalarDisplay, Uncertainty[ST]): + @property + def relative(self): + try: + return self._err / self._nom + except OverflowError: + return np.inf + except ZeroDivisionError: + return np.nan + + def __float__(self): + if ERROR_ON_DOWNCAST: + msg = "The uncertainty is stripped when downcasting to float." + raise DowncastError(msg) + else: + warnings.warn( + "The uncertainty is stripped when downcasting to float.", + DowncastWarning, + stacklevel=2, + ) + + return float(self._nom) + + def __int__(self): + if ERROR_ON_DOWNCAST: + msg = "The uncertainty is stripped when downcasting to float." + raise DowncastError(msg) + else: + warnings.warn( + "The uncertainty is stripped when downcasting to float.", + DowncastWarning, + stacklevel=2, + ) + return int(self._nom) + + def __complex__(self): + if ERROR_ON_DOWNCAST: + msg = "The uncertainty is stripped when downcasting to float." + raise DowncastError(msg) + else: + warnings.warn( + "The uncertainty is stripped when downcasting to float.", + DowncastWarning, + stacklevel=2, + ) + return complex(self._nom) + + def __round__(self, ndigits): + return self.__class__(round(self._nom, ndigits=ndigits), self._err) + + @property + def rel2(self): + try: + return self.relative**2 + except OverflowError: + return np.inf + + def __ne__(self, other): + out = self.__eq__(other) + return not out + + def __eq__(self, other): + if isinstance(other, Uncertainty): + try: + ret = math.isclose(self._nom, other._nom, rel_tol=COMPARE_RTOL) + except TypeError: + ret = self._nom == other._nom + else: + try: + ret = math.isclose(self._nom, other, rel_tol=COMPARE_RTOL) + except TypeError: + ret = self._nom == other + return ret + + def __hash__(self) -> int: + return hash((self._nom, self._err)) diff --git a/auto_uncertainties/util.py b/auto_uncertainties/util.py index 619798f..6c09cd0 100644 --- a/auto_uncertainties/util.py +++ b/auto_uncertainties/util.py @@ -1,15 +1,14 @@ -# -*- coding: utf-8 -*- from __future__ import annotations -import warnings from functools import wraps -from typing import TypeGuard, TypeVar +from typing import TypeVar +import warnings -import numpy as np from jax import Array -from numpy.typing import ArrayLike, NDArray +import numpy as np +from numpy.typing import NDArray -from . import NumpyDowncastWarning +from . import DowncastWarning T = TypeVar("T", bound=np.generic, covariant=True) @@ -32,8 +31,7 @@ def ignore_runtime_warnings(f): def runtime_warn_inner(*args, **kwargs): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", category=RuntimeWarning) - response = f(*args, **kwargs) - return response + return f(*args, **kwargs) return runtime_warn_inner @@ -55,9 +53,8 @@ def ignore_numpy_downcast_warnings(f): @wraps(f) def user_warn_inner(*args, **kwargs): with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", category=NumpyDowncastWarning) - response = f(*args, **kwargs) - return response + warnings.simplefilter("ignore", category=DowncastWarning) + return f(*args, **kwargs) return user_warn_inner @@ -78,104 +75,9 @@ def has_length(y): return True -def is_np_duck_array(cls: ArrayLike) -> TypeGuard[ArrayLike]: - """Check if object is a numpy array-like, but not a Uncertainty - - Parameters - ---------- - cls : class - - Returns - ------- - bool - """ - try: - import numpy as np - except ImportError: - return False - - return issubclass(cls, np.ndarray) or ( - not hasattr(cls, "_nom") - and not hasattr(cls, "_err") - and hasattr(cls, "__array_function__") - and hasattr(cls, "ndim") - and hasattr(cls, "dtype") - ) - - -class Display(object): - default_format: str = "" - - def _repr_html_(self): - val_ = self._nom - err_ = self._err - if is_np_duck_array(type(self._nom)): - header = "" - footer = "
" - val = f"Magnitude
{val_}
" - err = f"Error
{err_}
" - return header + val + err + footer - else: - val = f"{val_}" - err = f"{err_}" - return f"{val} {chr(0x00B1)} {err}" - - def _repr_latex_(self): - val_ = self._nom - err_ = self._err - if is_np_duck_array(type(self._nom)): - s = ( - ", ".join( - [ - f"{v} \\pm {e}" - for v, e in zip(val_.ravel(), err_.ravel()) - ] - ) - + "~" - ) - header = "$" - footer = "$" - return header + s + footer - else: - val = f"{val_}" - err = f"{err_}" - return f"${val} \\pm {err}$" - - def __str__(self) -> str: - val_ = self._nom - err_ = self._err - - if self._nom is not None: - if self._err is not None: - if is_np_duck_array(type(self._nom)): - return ( - "[" - + ", ".join( - [ - f"{v} +/- {e}" - for v, e in zip(val_.ravel(), err_.ravel()) - ] - ) - + "]" - ) - else: - return f"{val_} +/- {err_}" - else: - return f"{val_}" - - def __format__(self, fmt): - val_ = self._nom - err_ = self._err - str_rep = f"{val_:{fmt}} +/- {err_:{fmt}}" - return str_rep - - def __repr__(self) -> str: - return str(self) - - def ndarray_to_scalar(value: NDArray[T]) -> T: return np.ndarray.item(strip_device_array(value)) -def strip_device_array(value: Array | NDArray) -> NDArray: +def strip_device_array(value: Array | NDArray | float) -> NDArray: return np.array(value) diff --git a/docs/conf.py b/docs/conf.py index 4767af6..884e8ed 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from __future__ import annotations import datetime @@ -37,6 +36,7 @@ "sphinx.ext.viewcode", "sphinx.ext.mathjax", "sphinx_rtd_theme", + "sphinx.ext.napoleon", "sphinx_autodoc_typehints", "sphinx_copybutton", ] @@ -67,11 +67,13 @@ add_function_parentheses = False # -- Options for extensions ---------------------------------------------------- # napoleon - +typehints_fully_qualified = False typehints_defaults = "comma" typehints_use_rtype = True typehints_document_rtype = True always_document_param_types = True +typehints_use_signature = True +typehints_use_signature_return = True # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for diff --git a/docs/pandas_integration.rst b/docs/pandas_integration.rst index e69de29..d33b13d 100644 --- a/docs/pandas_integration.rst +++ b/docs/pandas_integration.rst @@ -0,0 +1,4 @@ +Pandas Integration +================ + +Still WIP diff --git a/docs/source/auto_uncertainties.numpy.rst b/docs/source/auto_uncertainties.numpy.rst new file mode 100644 index 0000000..d258b79 --- /dev/null +++ b/docs/source/auto_uncertainties.numpy.rst @@ -0,0 +1,21 @@ +auto\_uncertainties.numpy package +================================= + +Submodules +---------- + +auto\_uncertainties.numpy.numpy\_wrappers module +------------------------------------------------ + +.. automodule:: auto_uncertainties.numpy.numpy_wrappers + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: auto_uncertainties.numpy + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/auto_uncertainties.pandas.rst b/docs/source/auto_uncertainties.pandas.rst new file mode 100644 index 0000000..aaf1db0 --- /dev/null +++ b/docs/source/auto_uncertainties.pandas.rst @@ -0,0 +1,29 @@ +auto\_uncertainties.pandas package +================================== + +Submodules +---------- + +auto\_uncertainties.pandas.unc\_array module +-------------------------------------------- + +.. automodule:: auto_uncertainties.pandas.unc_array + :members: + :undoc-members: + :show-inheritance: + +auto\_uncertainties.pandas.unc\_dtype module +-------------------------------------------- + +.. automodule:: auto_uncertainties.pandas.unc_dtype + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: auto_uncertainties.pandas + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/auto_uncertainties.rst b/docs/source/auto_uncertainties.rst index 10a5a77..33a889d 100644 --- a/docs/source/auto_uncertainties.rst +++ b/docs/source/auto_uncertainties.rst @@ -1,21 +1,31 @@ auto\_uncertainties package =========================== +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + auto_uncertainties.numpy + auto_uncertainties.pandas + auto_uncertainties.uncertainty + Submodules ---------- -auto\_uncertainties.pandas\_compat module ------------------------------------------ +auto\_uncertainties.display\_format module +------------------------------------------ -.. automodule:: auto_uncertainties.pandas_compat +.. automodule:: auto_uncertainties.display_format :members: :undoc-members: :show-inheritance: -auto\_uncertainties.uncertainty module --------------------------------------- +auto\_uncertainties.exceptions module +------------------------------------- -.. automodule:: auto_uncertainties.uncertainty +.. automodule:: auto_uncertainties.exceptions :members: :undoc-members: :show-inheritance: @@ -28,14 +38,6 @@ auto\_uncertainties.util module :undoc-members: :show-inheritance: -auto\_uncertainties.wrap\_numpy module --------------------------------------- - -.. automodule:: auto_uncertainties.wrap_numpy - :members: - :undoc-members: - :show-inheritance: - Module contents --------------- diff --git a/docs/source/auto_uncertainties.uncertainty.rst b/docs/source/auto_uncertainties.uncertainty.rst new file mode 100644 index 0000000..057b64a --- /dev/null +++ b/docs/source/auto_uncertainties.uncertainty.rst @@ -0,0 +1,10 @@ +auto\_uncertainties.uncertainty package +======================================= + +Module contents +--------------- + +.. automodule:: auto_uncertainties.uncertainty + :members: + :undoc-members: + :show-inheritance: diff --git a/hooks/fix_line_endings.py b/hooks/fix_line_endings.py new file mode 100755 index 0000000..22b2dc3 --- /dev/null +++ b/hooks/fix_line_endings.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +from pathlib import Path +import sys + + +def main(source_path: str) -> bool: + """ + Main entry point of the script. + + Parameters + ---------- + function : Callable + Function to execute for the specified validation type. + source_path : str + Source path representing path to a file/directory. + output_format : str + Output format of the error message. + file_extensions_to_check : str + Comma separated values of what file extensions to check. + excluded_file_paths : str + Comma separated values of what file paths to exclude during the check. + + Returns + ------- + bool + True if found any patterns are found related to the given function. + + Raises + ------ + ValueError + If the `source_path` is not pointing to existing file/directory. + """ + + for file_path in source_path: + with Path(file_path).open("r", encoding="utf-8") as file_obj: + file_text = file_obj.read() + + invalid_ending = "\r" in file_text + if invalid_ending: + with Path(file_path).open("w", encoding="utf-8") as file_obj: + file_obj.write(file_text) + + return invalid_ending + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="CR/CRLF -> LF converter.") + + parser.add_argument("paths", nargs="*", help="Source paths of files to check.") + + args = parser.parse_args() + + sys.exit(main(source_path=args.paths)) diff --git a/hooks/generate_init.py b/hooks/generate_init.py new file mode 100755 index 0000000..fdc605c --- /dev/null +++ b/hooks/generate_init.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import subprocess +import sys + +from mkinit import static_mkinit + + +def ruff_format(): ... + + +def make_init(): + options = { + "with_attrs": True, + "with_mods": True, + "with_all": True, + "relative": True, + "lazy_import": False, + "lazy_loader": True, + "lazy_loader_typed": True, + "lazy_boilerplate": None, + "use_black": False, + } + + static_mkinit.autogen_init( + "auto_uncertainties", + respect_all=True, + options=options, + dry=False, + diff=False, + recursive=True, + ) + subprocess.run(["ruff", "format"]) + + +if __name__ == "__main__": + make_init() + + changed_files1 = subprocess.run( + ["git", "diff", "--name-only", "--diff-filter=ACM", "--exit-code"] + ) + changed_files2 = subprocess.run( + ["git", "ls-files", "--exclude-standard", "--others"], capture_output=True + ) + retcode = changed_files1.returncode + changed_files2.returncode + retcode += len(changed_files2.stderr) + retcode += len(changed_files2.stdout) + + sys.exit(retcode) diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..319f11c --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,120 @@ +--- +title: 'AutoUncertainties: A Python package for uncertainty propagation' +tags: + - Python + - uncertainty propagation +authors: + - name: Varchas Gopalaswamy + orcid: 0000-0002-8013-9314 + equal-contrib: true + affiliation: "1" # (Multiple affiliations must be quoted) +affiliations: + - name: Laboratory for Laser Energetics, Rochester, USA + index: 1 +date: 3 April 2024 + +--- + +# Summary + +Propagation of uncertainties is of great utility in the experimental sciences. +While the rules of (linear) uncertainty propagation +are simple, managing many variables with uncertainty information +can quickly become complicated in large scientific software stacks, and require +keeping track of many variables and implementing custom error propagation +rules for each mathematical operator. The python package `AutoUncertainties`, +described here, provides a solution to this problem. + +# Statement of need + +`AutoUncertainties` is Python package for uncertainty propagation. It provides +a drop-in mechanism to add uncertainty information to python scalar and `numpy` +array variables. It implements manual propagation rules for the python dunder math +methods, and uses automatic differentiation via `JAX` to propagate uncertainties +for most numpy methods applied to both scalar and numpy array variables. In doing so, +it eliminates the need for carrying around additional uncertainty variables, +needing to implement custom propagation rules for any numpy operator with a gradient +rule implemented by `jax`, and in most cases requires minimal modification to existing code, +typically only when uncertainties are attached to central values. + +# Prior Work + +To the author's knowledge, the only existing error propagation library in python is the `uncertainties` package, +which inspired the current work. While extremely useful, the `uncertainties` package +relies on hand-implemented rules and functions for uncertainty propagation of array and scalar data. While +this is transparent for the intrinsic dunder methods such as `__add__`, it becomes problematic for advanced +mathematical operators. For instance, calculating the uncertainty +propagation due to the cosine requires the import of separate math libraries + +```python + +import numpy as np +from uncertainties import unumpy, ufloat +arr = np.array([ufloat(1, 0.1), ufloat(2, 0.002)]) +unumpy.cos(arr) + +``` + +rather than being able to use `numpy` directly + +```python +import numpy as np +from uncertainties import ufloat +arr = np.array([ufloat(1, 0.1), ufloat(2, 0.002)]) +np.cos(arr) + +``` + +# Implementation + +Linear uncertainty propagation of a function $f(x) : \mathbb{R}^n \rightarrow \mathbb{R}^m$ can be computed +via the simple rule $$ \delta f_j (x)^2 = \left ( \dfrac{\partial f_j}{\partial x_i}\left( x \right ) \delta x_i \right ) ^2 $$ + +To compute $\dfrac{\partial f_j}{\partial x_i}$ for arbitrary $f$, the implementation in `AutoUncertainties` relies on automatic +differentiaion provided by `jax`. Calls to any `numpy` array function or ufunc are intercepted via the `__array_function` +and `__array_ufunc__` mechanism, and dispatched to a numpy wrapper routine that computes the Jacobian matrix via `jax.jacfwd`. + +The user API for the `Uncertainty` object exposes only a small set of properties. +- `value -> float`: The cenral value of the object +- `error -> float`: The error of the object +- `relative -> float`: The relative error (i.e. error / value) of the object +- `plus_minus(self, err:float) -> Uncertainty`: Adds error (in quadrature) +- `from_sequence(self, seq: List[ScalarUncertainty]) -> VectorUncertainty`: Constructs an array `Uncertainty` object from a list of scalar `Uncertainty` objects + +To extract errors/central values from arbitrary objects, the accessors `nominal_values` and `std_devs` are provided. These +functions return +- The central values and errors respectively if the input is an `Uncertainty` object +- The input and zero if the input is any other kind of object + +`Uncertainty` objects are displayed using rounding rules based on the uncertainty, i.e. + +- Error to 2 significant digits +- Central value to first signficant digit of error, or two significant figures (whichever is more significant digits) + +This behavior can be toggled using `set_display_rounding`: + +```python +from auto_uncertainties import set_display_rounding +set_display_rounding(False) +``` + +Calling `__array__`, whether via `np.array` or any other method, will by default raise an error. This can be disabled so that a warning is issued instead +and the `Uncertainty` object is converted to an equivalent array of its nominal values via `set_downcast_error` + +```python +from auto_uncertainties import set_downcast_error +set_downcast_error(False) +``` + +## Pandas + +Support for `pandas` via the ExtensionArray mechanism is largely functional. + + + + +# Acknowledgements + +This material is based upon work supported by the Department of Energy [National Nuclear Security Administration] University of Rochester “National Inertial Confinement Fusion Program” under Award Number(s) DE-NA0004144, and Department of Energy [Office of Fusion Energy Sciences] University of Rochester “Applications of Machine Learning and Data Science to predict, design and improve laser-fusion implosions for inertial fusion energy” under Award Number(s) DE-SC0024381. + +This report was prepared as an account of work sponsored by an agency of the United States Government. Neither the United States Government nor any agency thereof, nor any of their employees, makes any warranty, express or implied, or assumes any legal liability or responsibility for the accuracy, completeness, or usefulness of any information, apparatus, product, or process disclosed, or represents that its use would not infringe privately owned rights. Reference herein to any specific commercial product, process, or service by trade name, trademark, manufacturer, or otherwise does not necessarily constitute or imply its endorsement, recommendation, or favoring by the United States Government or any agency thereof. The views and opinions of authors expressed herein do not necessarily state or reflect those of the United States Government or any agency thereof. diff --git a/pyproject.toml b/pyproject.toml index 75f1bfb..920692c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ authors = [ ] description = "Linear Uncertainty Propagation with Auto-Differentiation" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.11" license = {text = "MIT"} classifiers = [ "Programming Language :: Python :: 3", @@ -16,7 +16,7 @@ dependencies = [ "jaxlib >= 0.3.14", "joblib >= 1.2.0", "loguru >= 0.6.0", - "typing_extensions >= 4.7.0", + "lazy_loader==0.4", ] dynamic = ["version"] @@ -31,63 +31,8 @@ build-backend = "setuptools.build_meta" [tool.setuptools_scm] write_to = "auto_uncertainties/_version.py" -[tool.ruff] -line-length = 79 -target-version = "py311" -ignore = ["E501"] - -[tool.ruff.isort] -force-sort-within-sections = true -required-imports = ["from __future__ import annotations"] - - -[tool.black] -line-length = 79 -include = '\.pyi?$' -exclude = ''' -/( - \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist -)/ -''' - - -[tool.isort] -profile = "black" -line_length = 79 -force_alphabetical_sort_within_sections = true -add_imports = ["from __future__ import annotations"] - -[tool.pylint] -#Enforced by black -max-line-length = 1000 - -disable = """ -line-too-long, -missing-module-docstring, -missing-function-docstring, -broad-exception-caught, -too-many-branches, -invalid-name, -protected-access, -import-outside-toplevel, -wrong-import-position, -missing-class-docstring, -too-many-locals, -redefined-builtin, -too-few-public-methods, -global-statement, -no-member, -no-else-return -""" -ignore = "tests" +[tool.setuptools.packages.find] +exclude = ["tests/*"] [tool.pyright] pythonVersion = "3.11" diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..815785a --- /dev/null +++ b/ruff.toml @@ -0,0 +1,75 @@ + +target-version = "py311" + +[lint] +select = [ + "I", + "W291", + "W292", + "T100", + "YTT", + "UP009", + "E", + "F", + "UP", + "B", + "SIM", + "A", + "COM", + "C4", + "DTZ", + "EM", + "EXE", + "FA", + "ISC", + "ICN", + "LOG", + "G", + "INP", + "PIE", + "T20", + "PYI", + "PT", + "Q", + "RSE", + "RET", + "SLF", + "TID", + "INT", + "PTH", + "TD", + "PD", + "TRY", + "FLY", + "NPY", + "PERF", + "RUF", + "S", +] + +ignore = ["E501", "ISC002", "ISC001", +"COM819", "COM812", "Q003", "Q002", "Q001", "Q000", +"D300", "D206", "E117", "E114", "E111", "W191", +"B008", "SIM300", "S101", "RET505", "SLF001", "RET506" +] + +[lint.per-file-ignores] +"__init__.py" = ["F401", "E402", "I001", "I002"] +"__init__.pyi" = ["F401", "E402", "I001", "I002"] +"doc/source/conf.py" = ["INP001"] + +[format] +docstring-code-format = true +line-ending = "lf" + +[lint.isort] +from-first = false +known-first-party = ["hibiscus"] +force-sort-within-sections = true +required-imports = ["from __future__ import annotations"] + +[lint.flake8-bugbear] +extend-immutable-calls = ["pylatex.utils.NoEscape"] + +[lint.flake8-self] +ignore-names = [] diff --git a/tests/pandas/conftest.py b/tests/pandas/conftest.py new file mode 100644 index 0000000..9a5cfc8 --- /dev/null +++ b/tests/pandas/conftest.py @@ -0,0 +1,352 @@ +from __future__ import annotations + +import operator + +import numpy as np +from pandas import Series +import pytest + +from auto_uncertainties import Uncertainty, UncertaintyArray, UncertaintyDtype + + +@pytest.fixture() +def dtype(): + """A fixture providing the ExtensionDtype to validate.""" + return UncertaintyDtype(np.float64) + + +@pytest.fixture() +def data(): + """ + Length-100 array for this type. + + * data[0] and data[1] should both be non missing + * data[0] and data[1] should not be equal + """ + + v = np.random.random(size=100) + e = np.random.random(size=100) + + return UncertaintyArray(v, e) + + +@pytest.fixture() +def data_for_twos(dtype): + """ + Length-100 array in which all the elements are two. + + Call pytest.skip in your fixture if the dtype does not support divmod. + """ + if not (dtype._is_numeric or dtype.kind == "m"): + # Object-dtypes may want to allow this, but for the most part + # only numeric and timedelta-like dtypes will need to implement this. + pytest.skip(f"{dtype} is not a numeric dtype") + + v = np.ones(100) * 2 + e = np.random.random(size=100) + return UncertaintyArray(v, e) + + +@pytest.fixture() +def data_missing(): + """Length-2 array with [NA, Valid]""" + return UncertaintyArray([np.nan, 1], [0, 0.1]) + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + +@pytest.fixture() +def data_repeated(data): + """ + Generate many datasets. + + Parameters + ---------- + data : fixture implementing `data` + + Returns + ------- + Callable[[int], Generator]: + A callable that takes a `count` argument and + returns a generator yielding `count` datasets. + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + +@pytest.fixture() +def data_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + + For boolean dtypes (for which there are only 2 values available), + set B=C=True + """ + + return UncertaintyArray([1, 2, 0], [0.1, 0.2, 0.3]) + + +@pytest.fixture() +def data_missing_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + """ + return UncertaintyArray([1, np.nan, 0], [0.1, np.nan, 0.3]) + + +@pytest.fixture() +def na_cmp(): + """ + Binary operator for comparing NA values. + + Should return a function of two arguments that returns + True if both arguments are (scalar) NA for your type. + + By default, uses ``operator.is_`` + """ + + def na_compare(x, y): + if isinstance(x, Uncertainty): + xval = x.value + else: + xval = x + if isinstance(y, Uncertainty): + yval = y.value + else: + yval = y + if np.isnan(xval) and np.isnan(yval): + return True + return operator.is_(xval, yval) + + return na_compare + + +@pytest.fixture() +def na_value(dtype): + """ + The scalar missing value for this type. Default dtype.na_value. + + TODO: can be removed in 3.x (see https://github.com/pandas-dev/pandas/pull/54930) + """ + return dtype.na_value + + +@pytest.fixture() +def data_for_grouping(): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing. + + If a dtype has _is_boolean = True, i.e. only 2 unique non-NA entries, + then set C=B. + """ + return UncertaintyArray([1, 1, np.nan, np.nan, 0, 0, 1, 2], [0.1] * 8) + + +@pytest.fixture(params=[True, False]) +def box_in_series(request): + """Whether to box the data in a Series""" + return request.param + + +_all_arithmetic_operators = [ + "__add__", + "__radd__", + "__sub__", + "__rsub__", + "__mul__", + "__rmul__", + "__floordiv__", + "__rfloordiv__", + "__truediv__", + "__rtruediv__", + "__pow__", + "__rpow__", + "__mod__", + "__rmod__", +] + + +@pytest.fixture(params=_all_arithmetic_operators) +def all_arithmetic_operators(request): + """ + Fixture for dunder names for common arithmetic operations + """ + return request.param + + +@pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]) +def all_compare_operators(request): + """ + Fixture for dunder names for common compare operations + + * >= + * > + * == + * != + * < + * <= + """ + return request.param + + +# commented functions aren't implemented in numpy/pandas +_all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + # "prod", + "std", + "var", + "median", + "sem", + "kurt", + "skew", +] + + +@pytest.fixture(params=_all_numeric_reductions) +def all_numeric_reductions(request): + """ + Fixture for numeric reduction names. + """ + return request.param + + +_all_boolean_reductions = [] + + +@pytest.fixture(params=_all_boolean_reductions) +def all_boolean_reductions(request): + """ + Fixture for boolean reduction names. + """ + return request.param + + +_all_numeric_accumulations = ["cumsum", "cumprod", "cummin", "cummax"] + + +@pytest.fixture(params=_all_numeric_accumulations) +def all_numeric_accumulations(request): + """ + Fixture for numeric accumulation names + """ + return request.param + + +@pytest.fixture( + params=[ + operator.ge, + operator.gt, + operator.le, + operator.lt, + operator.eq, + operator.ne, + ], +) +def comparison_op(request): + """ + Functions to test groupby.apply(). + """ + return request.param + + +@pytest.fixture( + params=[ + lambda x: 1, + lambda x: [1] * len(x), + lambda x: Series([1] * len(x)), + lambda x: x, + ], + ids=["scalar", "list", "series", "object"], +) +def groupby_apply_op(request): + """ + Functions to test groupby.apply(). + """ + return request.param + + +@pytest.fixture(params=[None, lambda x: x]) +def sort_by_key(request): + """ + Simple fixture for testing keys in sorting methods. + Tests None (no key) and the identity key. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_frame(request): + """ + Boolean fixture to support Series and Series.to_frame() comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + """ + Boolean fixture to support arr and Series(arr) comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + """ + Boolean fixture to support comparison testing of ExtensionDtype array + and numpy array. + """ + return request.param + + +@pytest.fixture(params=["ffill", "bfill"]) +def fillna_method(request): + """ + Parametrized fixture giving method parameters 'ffill' and 'bfill' for + Series. testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_array(request): + """ + Boolean fixture to support ExtensionDtype _from_sequence method testing. + """ + return request.param + + +@pytest.fixture() +def invalid_scalar(data): + """ + A scalar that *cannot* be held by this ExtensionArray. + + The default should work for most subclasses, but is not guaranteed. + + If the array can hold any item (i.e. object dtype), then use pytest.skip. + """ + return object.__new__(object) diff --git a/tests/pandas/test_pandas_extension.py b/tests/pandas/test_pandas_extension.py new file mode 100644 index 0000000..1f35da4 --- /dev/null +++ b/tests/pandas/test_pandas_extension.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +from typing import final + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension import base +import pytest + +from auto_uncertainties import UncertaintyArray, nominal_values +from auto_uncertainties.uncertainty import set_downcast_error +from auto_uncertainties.uncertainty.uncertainty_containers import ( + set_compare_error, +) + +set_downcast_error(True) +set_compare_error(1e-4) + + +class TestUncertaintyArray(base.ExtensionTests): + def test_setitem_sequence(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + original = data.copy() + + data[[0, 1]] = [data[1], data[0]] + if data[0] != original[1]: + print(data[0], original[1]) + raise AssertionError("Setting with sequence failed") + + def test_invert(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + with pytest.raises(TypeError): + data = ~data + assert data[0] == ~data[0] + + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + + result = ser + data + expected = pd.Series(data + data) + tm.assert_series_equal(result, expected) + + def test_contains(self, data, data_missing): + # GH-37867 + # Tests for membership checks. Membership checks for nan-likes is tricky and + # the settled on rule is: `nan_like in arr` is True if nan_like is + # arr.dtype.na_value and arr.isna().any() is True. Else the check returns False. + + na_value = data.dtype.na_value + # ensure data without missing values + data = data[~data.isna()] + + # first elements are non-missing + assert data_missing[0] in data_missing + assert data[0] in data + + # check the presence of na_value + assert na_value in data_missing + assert na_value not in data + + # the data can never contain other nan-likes than na_value + for na_value_obj in tm.NULL_OBJECTS: + if na_value_obj is na_value or type(na_value_obj) == type(na_value): + # type check for e.g. two instances of Decimal("NAN") + continue + assert na_value_obj not in data + assert na_value_obj not in data_missing + + def _compare_other(self, ser: pd.Series, data, op, other): + if op.__name__ in ["eq", "ne"]: + # comparison should match point-wise comparisons + result = op(ser, other) + expected = ser.combine(other, op) + expected = self._cast_pointwise_result(op.__name__, ser, other, expected) + tm.assert_series_equal(result, expected) + + else: + result = op(ser, other) + + # Didn't error, then should match pointwise behavior + expected = ser.combine(other, op) + expected = self._cast_pointwise_result(op.__name__, ser, other, expected) + tm.assert_series_equal(result, expected) + + def test_compare_scalar(self, data, comparison_op): + ser = pd.Series(data) + self._compare_other(ser, data, comparison_op, 0) + + def test_compare_array(self, data, comparison_op): + ser = pd.Series(data) + other = pd.Series([data[0]] * len(data), dtype=data.dtype) + self._compare_other(ser, data, comparison_op, other) + + @final + def check_opname(self, ser: pd.Series, op_name: str, other): + op = self.get_op_from_name(op_name) + + self._check_op(ser, op, other, op_name, None) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + # ndarray & other series + op_name = all_arithmetic_operators + ser = pd.Series(data) + self.check_opname( + ser, op_name, pd.Series([ser.iloc[0]] * len(ser), dtype=ser.dtype) + ) + + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + # Specify if we expect this reduction to succeed. + return True + + def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): + # We perform the same operation on the np.float64 data and check + # that the results match. Override if you need to cast to something + # other than float64. + if op_name in UncertaintyArray._supported_reductions: + res_op = getattr(ser, op_name) + else: + with pytest.raises(TypeError): + res_op = getattr(ser, op_name) + + try: + alt = ser.astype("float64") + except (TypeError, ValueError): + # e.g. Interval can't cast (TypeError), StringArray can't cast + # (ValueError), so let's cast to object and do + # the reduction pointwise + alt = ser.astype(object) + + exp_op = getattr(alt, op_name) + if op_name == "count": + result = res_op() + expected = exp_op() + else: + result = res_op(skipna=skipna) + expected = exp_op(skipna=skipna) + + tm.assert_almost_equal(nominal_values(result), nominal_values(expected)) diff --git a/tests/test_numpy_wrap.py b/tests/test_numpy_wrap.py index 658c833..2bf8848 100644 --- a/tests/test_numpy_wrap.py +++ b/tests/test_numpy_wrap.py @@ -1,12 +1,11 @@ -# -*- coding: utf-8 -*- from __future__ import annotations import warnings -import hypothesis.strategies as st -import numpy as np from hypothesis import given, settings from hypothesis.extra import numpy as hnp +import hypothesis.strategies as st +import numpy as np import auto_uncertainties @@ -40,7 +39,7 @@ def op_test(op, *args, **kwargs): units = None w_ = op(*without_unc, **kwargs) except TypeError: - return + return None w = op(*with_unc, **kwargs) if units is not None and hasattr(w_, "units"): assert w.units == w_.units @@ -83,7 +82,7 @@ def inner(func): return inner -@given_float_3d(auto_uncertainties.wrap_numpy.bcast_same_shape_ufuncs) +@given_float_3d(auto_uncertainties.numpy.numpy_wrappers.bcast_same_shape_ufuncs) @settings(deadline=3000) def test_same_shape(unom, uerr, units, op): vnom = unom / 2 @@ -95,13 +94,13 @@ def test_same_shape(unom, uerr, units, op): u *= units v *= units oper = getattr(np, op) - if op in auto_uncertainties.wrap_numpy.unary_bcast_same_shape_ufuncs: + if op in auto_uncertainties.numpy.numpy_wrappers.unary_bcast_same_shape_ufuncs: op_test(oper, u, units=units) else: op_test(oper, u, v, units=units) -@given_float_3d(auto_uncertainties.wrap_numpy.bcast_same_shape_bool_ufuncs) +@given_float_3d(auto_uncertainties.numpy.numpy_wrappers.bcast_same_shape_bool_ufuncs) @settings(deadline=3000) def test_same_shape_bool(unom, uerr, units, op): vnom = unom / 2 @@ -112,14 +111,14 @@ def test_same_shape_bool(unom, uerr, units, op): u *= units v *= units oper = getattr(np, op) - if op in auto_uncertainties.wrap_numpy.unary_bcast_same_shape_bool_ufuncs: + if op in auto_uncertainties.numpy.numpy_wrappers.unary_bcast_same_shape_bool_ufuncs: w = oper(u) else: w = oper(u, v) assert w.dtype == bool -@given_float_3d(auto_uncertainties.wrap_numpy.bcast_nograd_ufuncs) +@given_float_3d(auto_uncertainties.numpy.numpy_wrappers.bcast_nograd_ufuncs) @settings(deadline=3000) def test_nograd(unom, uerr, units, op): vnom = unom / 2 @@ -138,7 +137,7 @@ def test_nograd(unom, uerr, units, op): assert not isinstance(w, auto_uncertainties.Uncertainty) -@given_float_3d(auto_uncertainties.wrap_numpy.bcast_apply_to_both_ufuncs) +@given_float_3d(auto_uncertainties.numpy.numpy_wrappers.bcast_apply_to_both_ufuncs) @settings(deadline=3000) def test_apply_to_both(unom, uerr, units, op): vnom = unom / 2 @@ -152,7 +151,7 @@ def test_apply_to_both(unom, uerr, units, op): op_test(oper, u, units=units) -@given_float_3d(auto_uncertainties.wrap_numpy.bcast_reduction_unary) +@given_float_3d(auto_uncertainties.numpy.numpy_wrappers.bcast_reduction_unary) @settings(deadline=3000) def test_unary_reduction(unom, uerr, units, op): vnom = unom / 2 diff --git a/tests/test_pandas_extension.py b/tests/test_pandas_extension.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_pandas_interface.py b/tests/test_pandas_interface.py deleted file mode 100644 index 8bd254a..0000000 --- a/tests/test_pandas_interface.py +++ /dev/null @@ -1,100 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import annotations - -import operator - -import numpy as np -import pandas as pd -import pytest -from pandas.tests.extension.conftest import as_array # noqa: F401 -from pandas.tests.extension.conftest import as_frame # noqa: F401 -from pandas.tests.extension.conftest import as_series # noqa: F401 -from pandas.tests.extension.conftest import fillna_method # noqa: F401 -from pandas.tests.extension.conftest import groupby_apply_op # noqa: F401 -from pandas.tests.extension.conftest import use_numpy # noqa: F401 - -from auto_uncertainties.pandas_compat import UncertaintyArray - - -@pytest.fixture -def data(): - return UncertaintyArray( - np.random.random(size=100), np.abs(np.random.random(size=100)) - ) - - -class TestUserInterface(object): - def test_get_underlying_data(self, data: UncertaintyArray): - ser = pd.Series(data) - # this first test creates an array of bool (which is desired, eg for indexing) - assert all(ser.values == data) - assert ser.values[23] == data[23] - - def test_arithmetic(self, data: UncertaintyArray): - ser = pd.Series(data) - ser2 = ser + ser - assert all(ser2.values == 2 * data) - - def test_initialisation(self, data: UncertaintyArray): - # fails with plain array - # works with UncertaintyArray - df = pd.DataFrame( - { - "x1": pd.Series(data.uncertainty, dtype="Uncertainty[float]"), - "x2": pd.Series(data.uncertainty.value, dtype="float"), - "x3": pd.Series( - data.uncertainty.value, - data.uncertainty.error, - dtype="uncertainty[float32]", - ), - } - ) - - for col in df.columns: - assert all(df[col] == df.length) - - def test_df_operations(self): - # simply a copy of what's in the notebook - df = pd.DataFrame( - { - "torque": pd.Series( - [1.0, 2.0, 2.0, 3.0], dtype="Uncertainty[float]" - ), - "angular_velocity": UncertaintyArray( - [1.0, 2.0, 2.0, 3.0], - [1.0, 2.0, 2.0, 3.0], - dtype="Uncertainty[float]", - ), - } - ) - - df["power"] = df["torque"] * df["angular_velocity"] - - df.power.values - df.power.values.value - df.power.values.error - - df.angular_velocity.values - - -arithmetic_ops = [ - operator.add, - operator.sub, - operator.mul, - operator.truediv, - operator.floordiv, - operator.pow, -] - -comparative_ops = [ - operator.eq, - operator.le, - operator.lt, - operator.ge, - operator.gt, -] - -unit_ops = [ - operator.mul, - operator.truediv, -] diff --git a/tests/test_uncertainty.py b/tests/test_uncertainty.py index f294220..ee87089 100644 --- a/tests/test_uncertainty.py +++ b/tests/test_uncertainty.py @@ -1,16 +1,20 @@ -# -*- coding: utf-8 -*- from __future__ import annotations +import math import operator import warnings +from hypothesis import given +from hypothesis.extra import numpy as hnp import hypothesis.strategies as st import numpy as np import pytest -from hypothesis import given -from hypothesis.extra import numpy as hnp -from auto_uncertainties import NegativeStdDevError, Uncertainty +from auto_uncertainties import ( + NegativeStdDevError, + ScalarUncertainty, + Uncertainty, +) try: from pint import DimensionalityError @@ -48,50 +52,71 @@ def check_units_and_mag(unc, units, mag, err): assert unc.error.to(units).m == err +general_float_strategy = dict( + allow_nan=False, + allow_infinity=False, + allow_subnormal=False, + min_value=-1e3, + max_value=1e3, +) + + @pytest.mark.filterwarnings("ignore::RuntimeWarning") @given( - v=st.floats(allow_subnormal=False), - e=st.floats(allow_subnormal=False), + v=st.floats(), + e=st.floats(), units=st.sampled_from(UNITS), + call_super=st.sampled_from([True, False]), ) -def test_scalar_creation(v, e, units): - if e < 0: - if np.isfinite(e): +def test_scalar_creation(v, e, units, call_super): + if call_super: + const = Uncertainty + else: + const = ScalarUncertainty + + if not np.isfinite(v): + u = const(v, e) + assert isinstance(u, float) + assert not np.isfinite(v) + elif not np.isfinite(e): + u = const(v, e) + assert u.error == 0 + elif e < 0: + with pytest.raises(NegativeStdDevError): + u = const(v, e) + if units is not None: with pytest.raises(NegativeStdDevError): - u = Uncertainty(v, e) - if units is not None: - with pytest.raises(NegativeStdDevError): - u = Uncertainty.from_quantities(v * units, e * units) + u = const.from_quantities(v * units, e * units) else: - u = Uncertainty(v, e) + u = const(v, e) if np.isfinite(v) and np.isfinite(e): - assert u.value == v - assert u.error == e + assert math.isclose(u.value, v) + assert math.isclose(u.error, e) if v > 0: - assert u.relative == e / v + assert math.isclose(u.relative, e / v) elif v == 0: assert not np.isfinite(u.relative) if units is not None: with pytest.raises(NotImplementedError): - u = Uncertainty(v * units, e * units) + u = const(v * units, e * units) - u = Uncertainty.from_quantities(v * units, e * units) + u = const.from_quantities(v * units, e * units) check_units_and_mag(u, units, v, e) - u = Uncertainty(v, e) * units + u = const(v, e) * units check_units_and_mag(u, units, v, e) with pytest.raises(DimensionalityError): - u = Uncertainty.from_quantities(v * units, e) + u = const.from_quantities(v * units, e) with pytest.raises(DimensionalityError): - u = Uncertainty.from_quantities(v, e * units) + u = const.from_quantities(v, e * units) @pytest.mark.filterwarnings("ignore::RuntimeWarning") @given( - v1=st.floats(allow_subnormal=False), - e1=st.floats(min_value=0, max_value=1e3, allow_subnormal=False), + v1=st.floats(**general_float_strategy), + e1=st.floats(min_value=0, max_value=1e3), op=st.sampled_from(UNARY_OPS), ) def test_scalar_unary(v1, e1, op): @@ -100,7 +125,7 @@ def test_scalar_unary(v1, e1, op): u = op(u1) if np.isfinite(v1): if isinstance(u, Uncertainty): - assert u.value == op(u1.value) + assert math.isclose(u.value, op(u1.value)) if np.isfinite(e1): assert np.isfinite(u.error) else: @@ -109,10 +134,16 @@ def test_scalar_unary(v1, e1, op): @pytest.mark.filterwarnings("ignore::RuntimeWarning") @given( - v1=st.floats(allow_subnormal=False), - v2=st.floats(allow_subnormal=False), - e1=st.floats(min_value=0, max_value=1e3, allow_subnormal=False), - e2=st.floats(min_value=0, max_value=1e3, allow_subnormal=False), + v1=st.floats(**general_float_strategy), + v2=st.floats(**general_float_strategy), + e1=st.floats( + min_value=0, + max_value=1e3, + ), + e2=st.floats( + min_value=0, + max_value=1e3, + ), op=st.sampled_from(BINARY_OPS), ) def test_scalar_binary(v1, e1, v2, e2, op): @@ -121,19 +152,25 @@ def test_scalar_binary(v1, e1, v2, e2, op): u = op(u1, u2) if isinstance(u, Uncertainty): - assert u.value == op(u1.value, u2.value) + assert math.isclose(u.value, op(u1.value, u2.value)) if np.isfinite(e1) and np.isfinite(e2): assert np.isfinite(u.error) else: - assert u == op(u1.value, u2.value) + assert math.isclose(u, op(u1.value, u2.value)) @pytest.mark.filterwarnings("ignore::RuntimeWarning") @given( - v1=st.floats(allow_subnormal=False), - v2=st.floats(allow_subnormal=False), - e1=st.floats(min_value=0, max_value=1e3, allow_subnormal=False), - e2=st.floats(min_value=0, max_value=1e3, allow_subnormal=False), + v1=st.floats(**general_float_strategy), + v2=st.floats(**general_float_strategy), + e1=st.floats( + min_value=0, + max_value=1e3, + ), + e2=st.floats( + min_value=0, + max_value=1e3, + ), op=st.sampled_from([operator.add, operator.sub]), ) def test_scalar_add_sub(v1, e1, v2, e2, op): @@ -142,17 +179,29 @@ def test_scalar_add_sub(v1, e1, v2, e2, op): u = op(u1, u2) if np.isfinite(v1) and np.isfinite(v2): - assert u.value == op(u1.value, u2.value) + assert math.isclose(u.value, op(u1.value, u2.value)) if np.isfinite(e1) and np.isfinite(e2): - assert u.error == np.sqrt(u1.error**2 + u2.error**2) + assert math.isclose(u.error, np.sqrt(u1.error**2 + u2.error**2)) @pytest.mark.filterwarnings("ignore::RuntimeWarning") @given( - v1=st.floats(min_value=1, max_value=1e3, allow_subnormal=False), - v2=st.floats(min_value=1, max_value=1e3, allow_subnormal=False), - e1=st.floats(min_value=0, max_value=1e3, allow_subnormal=False), - e2=st.floats(min_value=0, max_value=1e3, allow_subnormal=False), + v1=st.floats( + min_value=1, + max_value=1e3, + ), + v2=st.floats( + min_value=1, + max_value=1e3, + ), + e1=st.floats( + min_value=0, + max_value=1e3, + ), + e2=st.floats( + min_value=0, + max_value=1e3, + ), op=st.sampled_from([operator.mul, operator.truediv]), ) def test_scalar_mul_div(v1, e1, v2, e2, op): @@ -161,7 +210,7 @@ def test_scalar_mul_div(v1, e1, v2, e2, op): u = op(u1, u2) if np.isfinite(v1) and np.isfinite(v2): - assert u.value == op(u1.value, u2.value) + assert math.isclose(u.value, op(u1.value, u2.value)) if np.isfinite(e1) and np.isfinite(e2): np.testing.assert_almost_equal( u.error, u.value * np.sqrt(u1.rel**2 + u2.rel**2) @@ -170,7 +219,10 @@ def test_scalar_mul_div(v1, e1, v2, e2, op): @pytest.mark.filterwarnings("ignore::RuntimeWarning") @given( - v1=st.floats(min_value=0, max_value=1e3, allow_subnormal=False), + v1=st.floats( + min_value=0, + max_value=1e3, + ), v2=hnp.arrays( dtype=st.sampled_from([np.float64]), shape=(11,), @@ -181,7 +233,10 @@ def test_scalar_mul_div(v1, e1, v2, e2, op): allow_infinity=False, ), ), - e1=st.floats(min_value=0, max_value=1e3, allow_subnormal=False), + e1=st.floats( + min_value=0, + max_value=1e3, + ), e2=hnp.arrays( dtype=st.sampled_from([np.float64]), shape=(11,), @@ -210,3 +265,30 @@ def test_mixed_arithmetic(v1, e1, v2, e2, op): u = op(u2, u1) np.testing.assert_almost_equal(u.value, op(u2.value, u1.value)) + + +@pytest.mark.filterwarnings("ignore::RuntimeWarning") +@given( + v1=st.floats( + min_value=0, + max_value=1e3, + ), + e1=st.floats( + min_value=0, + max_value=1e3, + ), + op=st.sampled_from( + [ + np.exp, + np.abs, + np.log, + ] + ), +) +@pytest.mark.filterwarnings("ignore::RuntimeWarning") +def test_numpy_math_ops(v1, e1, op): + u1 = Uncertainty(v1, e1) + + u = op(u1) + if np.isfinite(v1) and np.isfinite(u): + math.isclose(u.value, op(u1.value))