Skip to content
This repository has been archived by the owner on Mar 6, 2023. It is now read-only.

Commit

Permalink
Merge pull request #11 from soxofaan/issue5-optional-deps
Browse files Browse the repository at this point in the history
make dask and xarray "optional dependencies"
  • Loading branch information
lforesta authored Sep 17, 2020
2 parents 787c93e + b5a4134 commit 58a547e
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 75 deletions.
27 changes: 19 additions & 8 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,27 @@ Installation
(You might want to use a different bootstrap python executable
instead of ``python3`` in this example.)

3. Install the package in the virtual environment using one of the following ways,
as you prefer:
3. Install the package in the virtual environment,
preferably through ``pip`` of your virtual environment::

- traditional way: ``python setup.py install``
- with pip: ``pip install .``
- if you plan to do development on the package itself,
install it in "development" mode with
``python setup.py develop`` or ``pip install -e .``
pip install .

If you plan to do development on the package itself,
install it in "development" mode with::

pip install -e .

If plan to process xarray or dask arrays, you probably
have the corresponding libraries already installed in your virtual env,
and ``openeo_processes`` will handle them appropriately out of the box.
You can however also explicitly pull these libraries in as "extra" dependencies
when installing ``openeo_processes``.
For example with one of the following install commands::

pip install .[dask]
pip install .[xarray]
pip install .[dask,xarray]

(Note that in this step we are using python and pip from the virtual environment.)

4. Optionally run the tests::

Expand Down
7 changes: 3 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,11 @@ package_dir =
# Add here dependencies of your project (semicolon-separated), e.g.
# install_requires = numpy; scipy
install_requires =
dask[array]
numpy
xarray
xarray-extras
pandas

# Add here test requirements (semicolon-separated)
tests_require = pytest; pytest-cov
tests_require = pytest; pytest-cov; dask; xarray; xarray-extras

[options.packages.find]
where = src
Expand All @@ -45,6 +42,8 @@ exclude =
# Add here additional requirements for extra features, to install with:
# `pip install openeo_processes[PDF]` like:
# PDF = ReportLab; RXP
dask = dask[array]
xarray = xarray; xarray-extras

[test]
# py.test options when running `python setup.py test`
Expand Down
8 changes: 6 additions & 2 deletions src/openeo_processes/math.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import numpy as np
import xarray_extras as xar_addons
import builtins
import numpy as np

try:
import xarray_extras as xar_addons
except ImportError:
xar_addons = None

from openeo_processes.utils import process
from openeo_processes.comparison import is_empty
Expand Down
85 changes: 24 additions & 61 deletions src/openeo_processes/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import xarray
import dask.array
import datetime
import re
import numpy as np
from datetime import timezone
Expand All @@ -12,14 +9,12 @@ def eval_datatype(data):
"""
Returns a data type tag depending on the data type of `data`.
This can be:
- "np": `np.ndarray` or list (list are directly converted to numpy arrays)
- "xar": `xarray.DataArray`
- "dar": `dask.array.core.Array`
- "num": `np.integer`, `np.float`, int, float, or str
- "dt": `datetime.datetime`
- "dict": dict
- "fun": callable object
- "none": None
- "numpy": `nump.ndarray`
- "xarray": `xarray.DataArray`
- "dask": `dask.array.core.Array`
- "int", "float", "dict", "list", "set", "tuple", "NoneType": Python builtins
- "datetime": `datetime.datetime`
- "function": callable object
Parameters
----------
Expand All @@ -32,37 +27,14 @@ def eval_datatype(data):
Data type tag.
"""
is_list = isinstance(data, list)
is_np = isinstance(data, np.ndarray) | is_list
is_xar = isinstance(data, xarray.DataArray)
is_dar = isinstance(data, dask.array.core.Array)
is_num = isinstance(data, (int, float, str, np.integer, np.float))
is_datetime = isinstance(data, datetime)
is_dict = isinstance(data, dict)
is_function = callable(data)
is_none = data is None

if is_np:
datatype = "np"
elif is_xar:
datatype = "xar"
elif is_dar:
datatype = "dar"
elif is_num:
datatype = "num"
elif is_none:
datatype = "none"
elif is_datetime:
datatype = "dt"
elif is_dict:
datatype = "dict"
elif is_function:
datatype = "fun"
package = type(data).__module__
package_root = package.split(".", 1)[0]
if package in ("builtins", "datetime"):
return type(data).__name__
elif package_root in ("numpy", "xarray", "dask"):
return package_root
else:
err_msg = "Data type '{}' is unknown.".format(type(data))
raise ValueError(err_msg)

return datatype
return package + '.' + type(data).__name__


def process(processor):
Expand All @@ -84,29 +56,20 @@ def process(processor):
def fun_wrapper(*args, **kwargs):
cls = processor()

datatypes = []
# retrieve data types of input arguments and convert lists to numpy arrays
args = list(args)
for i, arg in enumerate(args):
datatypes.append(eval_datatype(arg))
if isinstance(arg, list):
args[i] = list2nparray(arg)
args = tuple(args)

# retrieve data types of input keyword arguments and convert lists to numpy arrays
for key, kwarg in kwargs.items():
datatypes.append(eval_datatype(kwarg))
if isinstance(kwarg, list):
kwargs[key] = list2nparray(kwarg)

datatypes = np.array(datatypes)
if (datatypes == "np").any():
# Convert lists to numpy arrays
args = tuple(list2nparray(a) if isinstance(a, list) else a for a in args)
kwargs = {k: (list2nparray(v) if isinstance(v, list) else v) for k, v in kwargs.items()}

# retrieve data types of input (keyword) arguments
datatypes = set(eval_datatype(a) for a in args)
datatypes.update(eval_datatype(v) for v in kwargs.values())
if "numpy" in datatypes:
cls_fun = getattr(cls, "exec_np")
elif (datatypes == "xar").any():
elif "xarray" in datatypes:
cls_fun = getattr(cls, "exec_xar")
elif (datatypes == "dar").any():
elif "dask" in datatypes:
cls_fun = getattr(cls, "exec_dar")
elif ((datatypes == "num") | (datatypes == "none") | (datatypes == "dt")).all():
elif datatypes.issubset({"int", "float", "NoneType", "str", "bool", "datetime"}):
cls_fun = getattr(cls, "exec_num")
else:
raise Exception('Datatype unknown.')
Expand Down
28 changes: 28 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import datetime

import dask.array.core
import numpy as np
import pytest
import xarray
from openeo_processes.utils import eval_datatype


@pytest.mark.parametrize(["data", "expected"], [
(None, "NoneType"),
(True, "bool"),
(False, "bool"),
("123", "str"),
(123, "int"),
(123.456, "float"),
([1, 2, 3], "list"),
((1, 2, 3), "tuple"),
({1, 2, 3}, "set"),
({1: 2, 3: 4}, "dict"),
(lambda x, y: x + y, "function"),
(datetime.datetime.now(), "datetime"),
(np.array([1, 2, 3]), "numpy"),
(xarray.DataArray([1, 2, 3]), "xarray"),
(dask.array.core.from_array([1, 2, 3]), "dask"),
])
def test_eval_datatype(data, expected):
assert eval_datatype(data) == expected

0 comments on commit 58a547e

Please sign in to comment.