Skip to content

Commit

Permalink
Specify dtype in map_complete_blocks and undo change to lazy_elementwise
Browse files Browse the repository at this point in the history
  • Loading branch information
bouweandela committed Jul 10, 2024
1 parent 7f77d6a commit c1e5e1a
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 54 deletions.
25 changes: 11 additions & 14 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,15 +536,13 @@ def lazy_elementwise(lazy_array, elementwise_op):
# This makes good practical sense for unit conversions, as a Unit.convert
# call may cast to float, or not, depending on unit equality : Thus, it's
# much safer to get udunits to decide that for us.
meta = da.utils.meta_from_array(lazy_array)
new_meta = elementwise_op(meta)
dtype = elementwise_op(np.zeros(1, lazy_array.dtype)).dtype
meta = da.utils.meta_from_array(lazy_array).astype(dtype)

return da.map_blocks(
elementwise_op, lazy_array, dtype=new_meta.dtype, meta=new_meta
)
return da.map_blocks(elementwise_op, lazy_array, dtype=dtype, meta=meta)


def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs):
def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs):
"""Apply a function to complete blocks.
Complete means that the data is not chunked along the chosen dimensions.
Expand All @@ -560,6 +558,8 @@ def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs):
Dimensions that cannot be chunked.
out_sizes : tuple of int
Output size of dimensions that cannot be chunked.
dtype :
Output dtype.
*args : tuple
Additional arguments to pass to `func`.
**kwargs : dict
Expand Down Expand Up @@ -599,14 +599,11 @@ def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs):
for dim, size in zip(dims, out_sizes):
out_chunks[dim] = size

# Assume operation does not change dtype and meta if not specified.
if "meta" not in kwargs:
kwargs["meta"] = da.utils.meta_from_array(data)
if "dtype" in kwargs:
kwargs["meta"] = kwargs["meta"].astype(kwargs["dtype"])
else:
kwargs["dtype"] = kwargs["meta"].dtype
# Assume operation preserves mask.
meta = da.utils.meta_from_array(data).astype(dtype)

result = data.map_blocks(func, *args, chunks=out_chunks, **kwargs)
result = data.map_blocks(
func, *args, chunks=out_chunks, meta=meta, dtype=dtype, **kwargs
)

return result
7 changes: 4 additions & 3 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1390,9 +1390,10 @@ def _percentile(data, percent, fast_percentile_method=False, **kwargs):

result = iris._lazy_data.map_complete_blocks(
data,
_calc_percentile,
(-1,),
percent.shape,
func=_calc_percentile,
dims=(-1,),
out_sizes=percent.shape,
dtype=np.float64,
percent=percent,
fast_percentile_method=fast_percentile_method,
**kwargs,
Expand Down
13 changes: 10 additions & 3 deletions lib/iris/analysis/_area_weighted.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,11 +392,18 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform(

tgt_shape = (len(grid_y.points), len(grid_x.points))

# Specify the output dtype
if np.issubdtype(src_cube.dtype, np.integer):
out_dtype = np.float64
else:
out_dtype = src_cube.dtype

new_data = map_complete_blocks(
src_cube,
_regrid_along_dims,
(src_y_dim, src_x_dim),
meshgrid_x.shape,
func=_regrid_along_dims,
dims=(src_y_dim, src_x_dim),
out_sizes=meshgrid_x.shape,
dtype=out_dtype,
x_dim=src_x_dim,
y_dim=src_y_dim,
weights=weights,
Expand Down
13 changes: 10 additions & 3 deletions lib/iris/analysis/_regrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,11 +935,18 @@ def __call__(self, src):
x_dim = src.coord_dims(src_x_coord)[0]
y_dim = src.coord_dims(src_y_coord)[0]

# Specify the output dtype
if self._method == "linear" and np.issubdtype(src.dtype, np.integer):
out_dtype = np.float64
else:
out_dtype = src.dtype

data = map_complete_blocks(
src,
self._regrid,
(y_dim, x_dim),
sample_grid_x.shape,
func=self._regrid,
dims=(y_dim, x_dim),
out_sizes=sample_grid_x.shape,
dtype=out_dtype,
x_dim=x_dim,
y_dim=y_dim,
src_x_coord=src_x_coord,
Expand Down
12 changes: 6 additions & 6 deletions lib/iris/tests/unit/analysis/regrid/test_RectilinearRegridder.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,19 +480,19 @@ def setUp(self):
def test_lazy_regrid(self):
result = self.lazy_regridder(self.lazy_cube)
self.assertTrue(result.has_lazy_data())
self.assertTrue(
isinstance(da.utils.meta_from_array(result.core_data()), np.ndarray)
)
meta = da.utils.meta_from_array(result.core_data())
self.assertTrue(meta.__class__ is np.ndarray)
expected = self.regridder(self.cube)
self.assertEqual(result.dtype, expected.dtype)
self.assertTrue(result == expected)

def test_lazy_masked_regrid(self):
result = self.lazy_regridder(self.lazy_masked_cube)
self.assertTrue(result.has_lazy_data())
self.assertTrue(
isinstance(da.utils.meta_from_array(result.core_data()), np.ma.MaskedArray)
)
meta = da.utils.meta_from_array(result.core_data())
self.assertTrue(isinstance(meta, np.ma.MaskedArray))
expected = self.regridder(self.cube)
self.assertEqual(result.dtype, expected.dtype)
self.assertTrue(result == expected)


Expand Down
47 changes: 22 additions & 25 deletions lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def func(chunk):
def test_non_lazy_input(self):
# Check that a non-lazy input doesn't trip up the functionality.
cube, cube_data = create_mock_cube(self.array)
result = map_complete_blocks(cube, self.func, dims=(1,), out_sizes=(4,))
result = map_complete_blocks(
cube, self.func, dims=(1,), out_sizes=(4,), dtype=self.array.dtype
)
self.assertFalse(is_lazy_data(result))
self.assertArrayEqual(result, self.func_result)
# check correct data was accessed
Expand All @@ -60,7 +62,9 @@ def test_non_lazy_input(self):
def test_lazy_input(self):
lazy_array = da.asarray(self.array, chunks=((1, 1), (4,)))
cube, cube_data = create_mock_cube(lazy_array)
result = map_complete_blocks(cube, self.func, dims=(1,), out_sizes=(4,))
result = map_complete_blocks(
cube, self.func, dims=(1,), out_sizes=(4,), dtype=lazy_array.dtype
)
self.assertTrue(is_lazy_data(result))
self.assertArrayEqual(result.compute(), self.func_result)
# check correct data was accessed
Expand All @@ -69,35 +73,22 @@ def test_lazy_input(self):

def test_dask_array_input(self):
lazy_array = da.asarray(self.array, chunks=((1, 1), (4,)))
result = map_complete_blocks(lazy_array, self.func, dims=(1,), out_sizes=(4,))
result = map_complete_blocks(
lazy_array, self.func, dims=(1,), out_sizes=(4,), dtype=lazy_array.dtype
)
self.assertTrue(is_lazy_data(result))
self.assertArrayEqual(result.compute(), self.func_result)

def test_dask_masked_array_input(self):
array = da.ma.masked_array(np.arange(2), mask=np.arange(2))
result = map_complete_blocks(array, self.func, dims=tuple(), out_sizes=tuple())
result = map_complete_blocks(
array, self.func, dims=tuple(), out_sizes=tuple(), dtype=array.dtype
)
self.assertTrue(is_lazy_data(result))
self.assertTrue(isinstance(da.utils.meta_from_array(result), np.ma.MaskedArray))
self.assertArrayEqual(result.compute(), np.ma.masked_array([1, 2], mask=[0, 1]))

def test_dask_array_input_with_meta(self):
lazy_array = da.asarray(self.array, chunks=((1, 1), (4,)))
meta = np.empty((), dtype=np.float32)

def func(chunk):
if chunk.size == 0:
raise ValueError
return (chunk + 1).astype(np.float32)

result = map_complete_blocks(
lazy_array, func, dims=(1,), out_sizes=(4,), meta=meta
)
self.assertTrue(isinstance(da.utils.meta_from_array(result), np.ndarray))
self.assertTrue(result.dtype == meta.dtype)
self.assertTrue(result.compute().dtype == meta.dtype)
self.assertArrayEqual(result.compute(), self.func_result)

def test_dask_array_input_with_dtype(self):
def test_dask_array_input_with_different_output_dtype(self):
lazy_array = da.ma.masked_array(self.array, chunks=((1, 1), (4,)))
dtype = np.float32

Expand All @@ -117,7 +108,9 @@ def func(chunk):
def test_rechunk(self):
lazy_array = da.asarray(self.array, chunks=((1, 1), (2, 2)))
cube, _ = create_mock_cube(lazy_array)
result = map_complete_blocks(cube, self.func, dims=(1,), out_sizes=(4,))
result = map_complete_blocks(
cube, self.func, dims=(1,), out_sizes=(4,), dtype=lazy_array.dtype
)
self.assertTrue(is_lazy_data(result))
self.assertArrayEqual(result.compute(), self.func_result)

Expand All @@ -129,15 +122,19 @@ def func(_):
return np.arange(2).reshape(1, 2)

func_result = [[0, 1], [0, 1]]
result = map_complete_blocks(cube, func, dims=(1,), out_sizes=(2,))
result = map_complete_blocks(
cube, func, dims=(1,), out_sizes=(2,), dtype=lazy_array.dtype
)
self.assertTrue(is_lazy_data(result))
self.assertArrayEqual(result.compute(), func_result)

def test_multidimensional_input(self):
array = np.arange(2 * 3 * 4).reshape(2, 3, 4)
lazy_array = da.asarray(array, chunks=((1, 1), (1, 2), (4,)))
cube, _ = create_mock_cube(lazy_array)
result = map_complete_blocks(cube, self.func, dims=(1, 2), out_sizes=(3, 4))
result = map_complete_blocks(
cube, self.func, dims=(1, 2), out_sizes=(3, 4), dtype=lazy_array.dtype
)
self.assertTrue(is_lazy_data(result))
self.assertArrayEqual(result.compute(), array + 1)

Expand Down

0 comments on commit c1e5e1a

Please sign in to comment.