diff --git a/README.rst b/README.rst index 42059881..260081aa 100644 --- a/README.rst +++ b/README.rst @@ -30,16 +30,27 @@ Installation (You might want to use a different bootstrap python executable instead of ``python3`` in this example.) -3. Install the package in the virtual environment using one of the following ways, - as you prefer: +3. Install the package in the virtual environment, + preferably through ``pip`` of your virtual environment:: - - traditional way: ``python setup.py install`` - - with pip: ``pip install .`` - - if you plan to do development on the package itself, - install it in "development" mode with - ``python setup.py develop`` or ``pip install -e .`` + pip install . + + If you plan to do development on the package itself, + install it in "development" mode with:: + + pip install -e . + + If plan to process xarray or dask arrays, you probably + have the corresponding libraries already installed in your virtual env, + and ``openeo_processes`` will handle them appropriately out of the box. + You can however also explicitly pull these libraries in as "extra" dependencies + when installing ``openeo_processes``. + For example with one of the following install commands:: + + pip install .[dask] + pip install .[xarray] + pip install .[dask,xarray] - (Note that in this step we are using python and pip from the virtual environment.) 4. Optionally run the tests:: diff --git a/setup.cfg b/setup.cfg index 9aeeb9de..80058e61 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,14 +27,11 @@ package_dir = # Add here dependencies of your project (semicolon-separated), e.g. # install_requires = numpy; scipy install_requires = - dask[array] numpy - xarray - xarray-extras pandas # Add here test requirements (semicolon-separated) -tests_require = pytest; pytest-cov +tests_require = pytest; pytest-cov; dask; xarray; xarray-extras [options.packages.find] where = src @@ -45,6 +42,8 @@ exclude = # Add here additional requirements for extra features, to install with: # `pip install openeo_processes[PDF]` like: # PDF = ReportLab; RXP +dask = dask[array] +xarray = xarray; xarray-extras [test] # py.test options when running `python setup.py test` diff --git a/src/openeo_processes/math.py b/src/openeo_processes/math.py index a3f97aad..147535df 100644 --- a/src/openeo_processes/math.py +++ b/src/openeo_processes/math.py @@ -1,6 +1,10 @@ -import numpy as np -import xarray_extras as xar_addons import builtins +import numpy as np + +try: + import xarray_extras as xar_addons +except ImportError: + xar_addons = None from openeo_processes.utils import process from openeo_processes.comparison import is_empty diff --git a/src/openeo_processes/utils.py b/src/openeo_processes/utils.py index e9df0852..5828c55e 100644 --- a/src/openeo_processes/utils.py +++ b/src/openeo_processes/utils.py @@ -1,6 +1,3 @@ -import xarray -import dask.array -import datetime import re import numpy as np from datetime import timezone @@ -12,14 +9,12 @@ def eval_datatype(data): """ Returns a data type tag depending on the data type of `data`. This can be: - - "np": `np.ndarray` or list (list are directly converted to numpy arrays) - - "xar": `xarray.DataArray` - - "dar": `dask.array.core.Array` - - "num": `np.integer`, `np.float`, int, float, or str - - "dt": `datetime.datetime` - - "dict": dict - - "fun": callable object - - "none": None + - "numpy": `nump.ndarray` + - "xarray": `xarray.DataArray` + - "dask": `dask.array.core.Array` + - "int", "float", "dict", "list", "set", "tuple", "NoneType": Python builtins + - "datetime": `datetime.datetime` + - "function": callable object Parameters ---------- @@ -32,37 +27,14 @@ def eval_datatype(data): Data type tag. """ - is_list = isinstance(data, list) - is_np = isinstance(data, np.ndarray) | is_list - is_xar = isinstance(data, xarray.DataArray) - is_dar = isinstance(data, dask.array.core.Array) - is_num = isinstance(data, (int, float, str, np.integer, np.float)) - is_datetime = isinstance(data, datetime) - is_dict = isinstance(data, dict) - is_function = callable(data) - is_none = data is None - - if is_np: - datatype = "np" - elif is_xar: - datatype = "xar" - elif is_dar: - datatype = "dar" - elif is_num: - datatype = "num" - elif is_none: - datatype = "none" - elif is_datetime: - datatype = "dt" - elif is_dict: - datatype = "dict" - elif is_function: - datatype = "fun" + package = type(data).__module__ + package_root = package.split(".", 1)[0] + if package in ("builtins", "datetime"): + return type(data).__name__ + elif package_root in ("numpy", "xarray", "dask"): + return package_root else: - err_msg = "Data type '{}' is unknown.".format(type(data)) - raise ValueError(err_msg) - - return datatype + return package + '.' + type(data).__name__ def process(processor): @@ -84,29 +56,20 @@ def process(processor): def fun_wrapper(*args, **kwargs): cls = processor() - datatypes = [] - # retrieve data types of input arguments and convert lists to numpy arrays - args = list(args) - for i, arg in enumerate(args): - datatypes.append(eval_datatype(arg)) - if isinstance(arg, list): - args[i] = list2nparray(arg) - args = tuple(args) - - # retrieve data types of input keyword arguments and convert lists to numpy arrays - for key, kwarg in kwargs.items(): - datatypes.append(eval_datatype(kwarg)) - if isinstance(kwarg, list): - kwargs[key] = list2nparray(kwarg) - - datatypes = np.array(datatypes) - if (datatypes == "np").any(): + # Convert lists to numpy arrays + args = tuple(list2nparray(a) if isinstance(a, list) else a for a in args) + kwargs = {k: (list2nparray(v) if isinstance(v, list) else v) for k, v in kwargs.items()} + + # retrieve data types of input (keyword) arguments + datatypes = set(eval_datatype(a) for a in args) + datatypes.update(eval_datatype(v) for v in kwargs.values()) + if "numpy" in datatypes: cls_fun = getattr(cls, "exec_np") - elif (datatypes == "xar").any(): + elif "xarray" in datatypes: cls_fun = getattr(cls, "exec_xar") - elif (datatypes == "dar").any(): + elif "dask" in datatypes: cls_fun = getattr(cls, "exec_dar") - elif ((datatypes == "num") | (datatypes == "none") | (datatypes == "dt")).all(): + elif datatypes.issubset({"int", "float", "NoneType", "str", "bool", "datetime"}): cls_fun = getattr(cls, "exec_num") else: raise Exception('Datatype unknown.') diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..5e4b022d --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,28 @@ +import datetime + +import dask.array.core +import numpy as np +import pytest +import xarray +from openeo_processes.utils import eval_datatype + + +@pytest.mark.parametrize(["data", "expected"], [ + (None, "NoneType"), + (True, "bool"), + (False, "bool"), + ("123", "str"), + (123, "int"), + (123.456, "float"), + ([1, 2, 3], "list"), + ((1, 2, 3), "tuple"), + ({1, 2, 3}, "set"), + ({1: 2, 3: 4}, "dict"), + (lambda x, y: x + y, "function"), + (datetime.datetime.now(), "datetime"), + (np.array([1, 2, 3]), "numpy"), + (xarray.DataArray([1, 2, 3]), "xarray"), + (dask.array.core.from_array([1, 2, 3]), "dask"), +]) +def test_eval_datatype(data, expected): + assert eval_datatype(data) == expected