Skip to content

Commit

Permalink
Add warp_mem_limit and num_threads to rasterio reproject calls
Browse files Browse the repository at this point in the history
This commit aims to expose the two available parameters in rasterio
reproject method, used to control its performance
  • Loading branch information
Ethan Carrés committed Feb 3, 2021
1 parent 219dbdf commit 5003a83
Showing 1 changed file with 60 additions and 25 deletions.
85 changes: 60 additions & 25 deletions telluric/georaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def _dest_resolution(first_raster, crs):

def merge_all(rasters, roi=None, dest_resolution=None, merge_strategy=MergeStrategy.UNION,
shape=None, ul_corner=None, crs=None, pixel_strategy=PixelStrategy.FIRST,
resampling=Resampling.nearest):
resampling=Resampling.nearest, num_threads=1, warp_mem_limit=0):
"""Merge a list of rasters, cropping by a region of interest.
There are cases that the roi is not precise enough for this cases one can use,
the upper left corner the shape and crs to precisely define the roi.
Expand All @@ -149,7 +149,8 @@ def merge_all(rasters, roi=None, dest_resolution=None, merge_strategy=MergeStrat

# Create a list of single band rasters
all_band_names, projected_rasters = _prepare_rasters(rasters, merge_strategy, empty,
resampling=resampling)
resampling=resampling,
num_threads=num_threads, warp_mem_limit=warp_mem_limit)
assert len(projected_rasters) == len(rasters)

prepared_rasters = _apply_pixel_strategy(projected_rasters, pixel_strategy)
Expand Down Expand Up @@ -221,8 +222,14 @@ def key(rs):
return rasters_final


def _prepare_rasters(rasters, merge_strategy, first, resampling=Resampling.nearest):
# type: (List[GeoRaster2], MergeStrategy, GeoRaster2, Resampling) -> Tuple[IndexedSet[str], List[Optional[_Raster]]]
def _prepare_rasters(rasters, # type: List[GeoRaster2]
merge_strategy, # type: MergeStrategy
first, # type: GeoRaster2
resampling=Resampling.nearest, # type: Resampling
num_threads=1, # type: int
warp_mem_limit=0 # type: int
):
# type: (...) -> Tuple[IndexedSet[str], List[Optional[_Raster]]]
"""Prepares the rasters according to the baseline (first) raster and the merge strategy.
The baseline (first) raster is used to crop and reproject the other rasters,
Expand All @@ -234,7 +241,8 @@ def _prepare_rasters(rasters, merge_strategy, first, resampling=Resampling.neare
all_band_names = IndexedSet(first.band_names)
projected_rasters = []
for raster in rasters:
projected_raster = _prepare_other_raster(first, raster, resampling=resampling)
projected_raster = _prepare_other_raster(first, raster, resampling=resampling,
num_threads=num_threads, warp_mem_limit=warp_mem_limit)

# Modify the bands only if an intersecting raster was returned
if projected_raster:
Expand Down Expand Up @@ -265,8 +273,8 @@ def _explode_raster(raster, band_names=[]):
return [_Raster(image=raster.bands_data([band_name]), band_names=[band_name]) for band_name in band_names]


def _prepare_other_raster(one, other, resampling=Resampling.nearest):
# type: (GeoRaster2, GeoRaster2, Resampling) -> Union[_Raster, None]
def _prepare_other_raster(one, other, resampling=Resampling.nearest, num_threads=1, warp_mem_limit=0):
# type: (GeoRaster2, GeoRaster2, Resampling, int, int) -> Union[_Raster, None]
# Crop and reproject the second raster, if necessary
if not (one.crs == other.crs and one.affine.almost_equals(other.affine) and one.shape == other.shape):
if one.footprint().intersects(other.footprint()):
Expand All @@ -284,7 +292,7 @@ def _prepare_other_raster(one, other, resampling=Resampling.nearest):

other = other._reproject(new_width=one.width, new_height=one.height,
dest_affine=one.affine, dst_crs=one.crs,
resampling=resampling)
resampling=resampling, num_threads=num_threads, warp_mem_limit=warp_mem_limit)

else:
return None
Expand Down Expand Up @@ -371,8 +379,9 @@ def _stack_bands(one, other):
return _Raster(image=new_image, band_names=new_bands)


def merge_two(one, other, merge_strategy=MergeStrategy.UNION, silent=False, pixel_strategy=PixelStrategy.FIRST):
# type: (GeoRaster2, GeoRaster2, MergeStrategy, bool, PixelStrategy) -> GeoRaster2
def merge_two(one, other, merge_strategy=MergeStrategy.UNION, silent=False, pixel_strategy=PixelStrategy.FIRST,
num_threads=1, warp_mem_limit=0):
# type: (GeoRaster2, GeoRaster2, MergeStrategy, bool, PixelStrategy, int, int) -> GeoRaster2
"""Merge two rasters into one.
Parameters
Expand All @@ -387,13 +396,19 @@ def merge_two(one, other, merge_strategy=MergeStrategy.UNION, silent=False, pixe
Whether to raise errors or return some result, default to False (raise errors).
pixel_strategy: PixelStrategy, optional
Pixel strategy, from :py:data:`telluric.georaster.PixelStrategy` (default to "top").
num_threads: Int, optional
Number of threads to be used by rasterio.warp.reproject method
warp_mem_limit: Int, optional
The warp operation memory limit in MB. Larger values allow the warp operation to be
carried out in fewer chunks. The amount of memory required to warp a 3-band uint8 2000 row x 2000 col
raster to a destination of the same size is approximately 56 MB. The default (0) means 64 MB with GDAL 2.2
Returns
-------
GeoRaster2
"""
other_res = _prepare_other_raster(one, other)
other_res = _prepare_other_raster(one, other, num_threads=num_threads, warp_mem_limit=warp_mem_limit)
if other_res is None:
if silent:
return one
Expand All @@ -405,7 +420,8 @@ def merge_two(one, other, merge_strategy=MergeStrategy.UNION, silent=False, pixe

# Create a list of single band rasters
# Cropping won't happen twice, since other was already cropped
all_band_names, projected_rasters = _prepare_rasters([other], merge_strategy, first=one)
all_band_names, projected_rasters = _prepare_rasters([other], merge_strategy, first=one,
num_threads=num_threads, warp_mem_limit=warp_mem_limit)

if not all_band_names and not silent:
raise ValueError("rasters have no bands in common, use another merge strategy")
Expand Down Expand Up @@ -1240,7 +1256,8 @@ def copy(self, mutable=False):

def copy_with(self, mutable=False, **kwargs):
"""Get a copy of this GeoRaster with some attributes changed. NOTE: image is shallow-copied!"""
init_args = {'affine': self.affine, 'crs': self.crs, 'band_names': self.band_names, 'nodata': self.nodata_value}
init_args = {'affine': self.affine, 'crs': self.crs,
'band_names': self.band_names, 'nodata': self.nodata_value}
init_args.update(kwargs)

# The image is a special case because we don't want to make a copy of a possibly big array
Expand Down Expand Up @@ -1278,7 +1295,7 @@ def res_xy(self):
return abs(self.affine[0]), abs(self.affine[4])

def resize(self, ratio=None, ratio_x=None, ratio_y=None, dest_width=None, dest_height=None, dest_resolution=None,
resampling=Resampling.cubic):
resampling=Resampling.cubic, num_threads=1, warp_mem_limit=0):
"""
Provide either ratio, or ratio_x and ratio_y, or dest_width and/or dest_height.
Expand All @@ -1302,9 +1319,9 @@ def resize(self, ratio=None, ratio_x=None, ratio_y=None, dest_width=None, dest_h
if ratio is not None:
ratio_x, ratio_y = ratio, ratio

return self._resize(ratio_x, ratio_y, resampling)
return self._resize(ratio_x, ratio_y, resampling, num_threads=num_threads, warp_mem_limit=warp_mem_limit)

def _resize(self, ratio_x, ratio_y, resampling):
def _resize(self, ratio_x, ratio_y, resampling, num_threads=1, warp_mem_limit=0):
"""Return raster resized by ratio."""
new_width = int(np.ceil(self.width * ratio_x))
new_height = int(np.ceil(self.height * ratio_y))
Expand All @@ -1314,7 +1331,8 @@ def _resize(self, ratio_x, ratio_y, resampling):
window = rasterio.windows.Window(0, 0, self.width, self.height)
resized_raster = self.get_window(window, xsize=new_width, ysize=new_height, resampling=resampling)
else:
resized_raster = self._reproject(new_width, new_height, dest_affine, resampling=resampling)
resized_raster = self._reproject(new_width, new_height, dest_affine, resampling=resampling,
num_threads=num_threads, warp_mem_limit=warp_mem_limit)
return resized_raster

def to_pillow_image(self, return_mask=False):
Expand Down Expand Up @@ -1345,7 +1363,7 @@ def _max_per_dtype(dtype):
return ret_val

def _reproject(self, new_width, new_height, dest_affine, dtype=None,
dst_crs=None, resampling=Resampling.cubic):
dst_crs=None, resampling=Resampling.cubic, num_threads=1, warp_mem_limit=0):
"""Return re-projected raster to new raster.
:param new_width: new raster width in pixels
Expand All @@ -1354,7 +1372,10 @@ def _reproject(self, new_width, new_height, dest_affine, dtype=None,
:param dtype: new raster dtype, default current dtype
:param dst_crs: new raster crs, default current crs
:param resampling: reprojection resampling method, default `cubic`
:param num_threads: number of threads to be used by rasterio.warp.reproject method
:param warp_mem_limit: The warp operation memory limit in MB. Larger values allow the warp operation to be
carried out in fewer chunks. The amount of memory required to warp a 3-band uint8 2000 row x 2000 col
raster to a destination of the same size is approximately 56 MB. The default (0) means 64 MB with GDAL 2.2
:return GeoRaster2
"""
if new_width == 0 or new_height == 0:
Expand Down Expand Up @@ -1383,7 +1404,8 @@ def _reproject(self, new_width, new_height, dest_affine, dtype=None,
dst_transform=dst_transform, src_crs=self.crs, dst_crs=dst_crs,
resampling=resampling, dest_alpha=alpha_band_idx,
init_dest_nodata=False, src_alpha=alpha_band_idx,
src_nodata=self.nodata_value)
src_nodata=self.nodata_value, num_threads=num_threads,
warp_mem_limit=warp_mem_limit,)
dest_image = np.ma.masked_array(dest_image[0:1, :, :], dest_image[1:2, :, :] == 0)
band_images.append(dest_image)
dest_image = np.ma.concatenate(band_images)
Expand All @@ -1394,7 +1416,7 @@ def _reproject(self, new_width, new_height, dest_affine, dtype=None,

def reproject(self, dst_crs=None, resolution=None, dimensions=None,
src_bounds=None, dst_bounds=None, target_aligned_pixels=False,
resampling=Resampling.cubic, creation_options=None, **kwargs):
resampling=Resampling.cubic, creation_options=None, num_threads=1, warp_mem_limit=0, **kwargs):
"""Return re-projected raster to new raster.
Parameters
Expand All @@ -1417,6 +1439,12 @@ def reproject(self, dst_crs=None, resolution=None, dimensions=None,
Reprojection resampling method. Default is `cubic`.
creation_options: dict, optional
Custom creation options.
num_threads: int, optional: number of threads to be used by rasterio.warp.reproject method
Default is 1
warp_mem_limit: int, optional: the warp operation memory limit in MB. Larger values allow the warp operation to
be carried out in fewer chunks. The amount of memory required to warp a 3-band uint8 2000 row x 2000 col
raster to a destination of the same size is approximately 56 MB. The default (0) means 64 MB with GDAL 2.2
Default is 0
kwargs: optional
Additional arguments passed to transformation function.
Expand Down Expand Up @@ -1447,7 +1475,8 @@ def reproject(self, dst_crs=None, resolution=None, dimensions=None,
target_aligned_pixels=target_aligned_pixels,
src_bounds=src_bounds, dst_bounds=dst_bounds)
new_raster = self._reproject(dst_width, dst_height, dst_transform,
dst_crs=dst_crs, resampling=resampling)
dst_crs=dst_crs, resampling=resampling, num_threads=num_threads,
warp_mem_limit=warp_mem_limit)

return new_raster

Expand Down Expand Up @@ -1726,7 +1755,8 @@ def vectorize(self, condition=None):

def __invert__(self):
"""Invert mask."""
return self.copy_with(image=np.ma.masked_array(self.image.data, np.logical_not(np.ma.getmaskarray(self.image))))
return self.copy_with(image=np.ma.masked_array(
self.image.data, np.logical_not(np.ma.getmaskarray(self.image))))

def mask(self, vector, mask_shape_nodata=False):
"""
Expand Down Expand Up @@ -1921,14 +1951,18 @@ def _get_tile_when_web_mercator_crs(self, x_tile, y_tile, zoom,
return self.get_window(window, bands=bands, xsize=256, ysize=256, masked=masked, affine=affine)

def get_tile(self, x_tile, y_tile, zoom,
bands=None, masked=None, resampling=Resampling.cubic):
bands=None, masked=None, resampling=Resampling.cubic, num_threads=1, warp_mem_limit=0):
"""Convert mercator tile to raster window.
:param x_tile: x coordinate of tile
:param y_tile: y coordinate of tile
:param zoom: zoom level
:param bands: list of indices of requested bands, default None which returns all bands
:param resampling: reprojection resampling method, default `cubic`
:param num_threads: number of threads to be used by rasterio.warp.reproject method
:param warp_mem_limit: The warp operation memory limit in MB. Larger values allow the warp operation to be
carried out in fewer chunks. The amount of memory required to warp a 3-band uint8 2000 row x 2000 col
raster to a destination of the same size is approximately 56 MB. The default (0) means 64 MB with GDAL 2.2
:return: GeoRaster2 of tile in WEB_MERCATOR_CRS
Expand Down Expand Up @@ -1961,7 +1995,8 @@ def get_tile(self, x_tile, y_tile, zoom,
bands=bands, resampling=resampling)
raster = raster.reproject(dst_crs=WEB_MERCATOR_CRS, resolution=resolution,
dst_bounds=roi_buffer.get_bounds(WEB_MERCATOR_CRS),
resampling=Resampling.cubic_spline)
resampling=Resampling.cubic_spline, num_threads=num_threads,
warp_mem_limit=warp_mem_limit)
# raster = raster.get_tile(x_tile, y_tile, zoom, bands, masked, resampling)
raster = raster.crop(roi).resize(dest_width=256, dest_height=256)
return raster
Expand Down

0 comments on commit 5003a83

Please sign in to comment.