diff --git a/CHANGELOG.md b/CHANGELOG.md index fdf0bb791..ddde0c7f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - Improve writing to zarr sinks from multiple processes ([#1713](../../pull/1713)) - Slightly faster GDAL validateCOG ([#1761](../../pull/1761)) - Improve clearing caches ([#1766](../../pull/1766)) +- Harden many of the source reads ([#1768](../../pull/1768)) ### Changes diff --git a/large_image/tilesource/geo.py b/large_image/tilesource/geo.py index f2bffae6f..503e24706 100644 --- a/large_image/tilesource/geo.py +++ b/large_image/tilesource/geo.py @@ -1,3 +1,4 @@ +import math import pathlib from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast from urllib.parse import urlencode, urlparse @@ -186,7 +187,7 @@ def _setDefaultStyle(self) -> None: self._bandNames = {} for idx, band in self.getBandInformation().items(): if band.get('interpretation'): - self._bandNames[band['interpretation'].lower()] = idx + self._bandNames[str(band['interpretation']).lower()] = idx if isinstance(getattr(self, '_style', None), dict) and ( not self._style or 'icc' in self._style and len(self._style) == 1): return @@ -277,6 +278,8 @@ def getNativeMagnification(self) -> Dict[str, Optional[float]]: :return: width of a pixel in mm, height of a pixel in mm. """ scale = self.getPixelSizeInMeters() + if scale and not math.isfinite(scale): + scale = None return { 'magnification': None, 'mm_x': scale * 100 if scale else None, diff --git a/sources/bioformats/large_image_source_bioformats/__init__.py b/sources/bioformats/large_image_source_bioformats/__init__.py index 5679cf0b4..def578d1d 100644 --- a/sources/bioformats/large_image_source_bioformats/__init__.py +++ b/sources/bioformats/large_image_source_bioformats/__init__.py @@ -62,7 +62,7 @@ # Default to ignoring files with no extension and some specific extensions. -config.ConfigValues['source_bioformats_ignored_names'] = r'(^[^.]*|\.(jpg|jpeg|jpe|png|tif|tiff|ndpi|nd2|ome|nc|json|geojson|isyntax|mrxs|zip|zarr(\.db|\.zip)))$' # noqa +config.ConfigValues['source_bioformats_ignored_names'] = r'(^[^.]*|\.(jpg|jpeg|jpe|png|tif|tiff|ndpi|nd2|ome|nc|json|geojson|fits|isyntax|mrxs|zip|zarr(\.db|\.zip)))$' # noqa def _monitor_thread(): diff --git a/sources/gdal/large_image_source_gdal/__init__.py b/sources/gdal/large_image_source_gdal/__init__.py index 994af4555..b6aaefc6d 100644 --- a/sources/gdal/large_image_source_gdal/__init__.py +++ b/sources/gdal/large_image_source_gdal/__init__.py @@ -99,7 +99,8 @@ class GDALFileTileSource(GDALBaseFileTileSource, metaclass=LruCacheMetaclass): cacheName = 'tilesource' name = 'gdal' - VECTOR_IMAGE_SIZE = 256 * 1024 + VECTOR_IMAGE_SIZE = 256 * 1024 # for vector files without projections + PROJECTED_VECTOR_IMAGE_SIZE = 32 * 1024 # if the file has a projection def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs): # noqa """ @@ -155,7 +156,7 @@ def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs): # noqa is_netcdf = self._checkNetCDF() try: scale = self.getPixelSizeInMeters() - except RuntimeError as exc: + except (RuntimeError, ZeroDivisionError) as exc: raise TileSourceError('File cannot be opened via GDAL: %r' % exc) if not self.sizeX or not self.sizeY: msg = 'File cannot be opened via GDAL (no size)' @@ -192,7 +193,8 @@ def _openVectorSource(self, vec): except Exception: proj = None # Define raster parameters - pixel_size = max(x_max - x_min, y_max - y_min) / self.VECTOR_IMAGE_SIZE + pixel_size = max(x_max - x_min, y_max - y_min) / ( + self.VECTOR_IMAGE_SIZE if proj is None else self.PROJECTED_VECTOR_IMAGE_SIZE) if not pixel_size: msg = 'Cannot determine dimensions' raise RuntimeError(msg) @@ -212,6 +214,8 @@ def _openVectorSource(self, vec): ds.SetGeoTransform((x_min, pixel_size, 0, y_min, 0, pixel_size)) if proj: ds.SetProjection(proj) + msg = f'Rasterizing a vector layer to {x_res} x {y_res}' + self.logger.info(msg) gdal.RasterizeLayer(ds, [1], layer, burn_values=[255]) if not hasattr(self.__class__, '_openVectorLock'): self.__class__._openVectorLock = threading.RLock() diff --git a/sources/mapnik/large_image_source_mapnik/__init__.py b/sources/mapnik/large_image_source_mapnik/__init__.py index df4b835cd..467536fae 100644 --- a/sources/mapnik/large_image_source_mapnik/__init__.py +++ b/sources/mapnik/large_image_source_mapnik/__init__.py @@ -97,6 +97,15 @@ def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs): projection = projection.lower() super().__init__( path, projection=projection, unitsPerPixel=unitsPerPixel, **kwargs) + if self.dataset.GetDriver().ShortName in {'MBTiles', 'Rasterlite', 'SQLite'}: + msg = 'File will not be opened via mapbox' + raise TileSourceError(msg) + self.logger.debug('mapnik source using the GDAL %s driver', + self.dataset.GetDriver().ShortName) + + def _openVectorSource(self, ds): + msg = 'File will not be opened via mapnik' + raise TileSourceError(msg) def _checkNetCDF(self): """ diff --git a/sources/pil/large_image_source_pil/__init__.py b/sources/pil/large_image_source_pil/__init__.py index 17b150f06..de8416a33 100644 --- a/sources/pil/large_image_source_pil/__init__.py +++ b/sources/pil/large_image_source_pil/__init__.py @@ -141,7 +141,7 @@ def __init__(self, path, maxSize=None, **kwargs): # noqa if self._pilImage is None: try: self._pilImage = PIL.Image.open(largeImagePath) - except (OSError, ValueError): + except (OSError, ValueError, NotImplementedError): if not os.path.isfile(largeImagePath): raise TileSourceFileNotFoundError(largeImagePath) from None msg = 'File cannot be opened via PIL.' @@ -178,7 +178,11 @@ def __init__(self, path, maxSize=None, **kwargs): # noqa except Exception: msg = 'PIL cannot find loader for this file.' raise TileSourceError(msg) - maxval = 256 ** math.ceil(math.log(float(np.max(imgdata)) + 1, 256)) - 1 + try: + maxval = 256 ** math.ceil(math.log(float(np.max(imgdata)) + 1, 256)) - 1 + except Exception: + msg = 'PIL cannot load this file.' + raise TileSourceError(msg) self._factor = 255.0 / max(maxval, 1) self._pilImage = PIL.Image.fromarray(np.uint8(np.multiply( imgdata, self._factor))) diff --git a/sources/rasterio/large_image_source_rasterio/__init__.py b/sources/rasterio/large_image_source_rasterio/__init__.py index d3ed26960..973769e10 100644 --- a/sources/rasterio/large_image_source_rasterio/__init__.py +++ b/sources/rasterio/large_image_source_rasterio/__init__.py @@ -642,7 +642,11 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): width=self.tileWidth, add_alpha=add_alpha, ) as vrt: - tile = vrt.read(resampling=rio.enums.Resampling.nearest) + try: + tile = vrt.read(resampling=rio.enums.Resampling.nearest) + except Exception: + self.logger.exception('Failed to getTile') + tile = np.zeros((1, 1)) # necessary for multispectral images: # set the coordinates first and the bands at the end diff --git a/test/lisource_compare.py b/test/lisource_compare.py index 61cdd683f..9dc3d5f76 100755 --- a/test/lisource_compare.py +++ b/test/lisource_compare.py @@ -208,14 +208,16 @@ def source_compare(sourcePath, opts): # noqa '_geospatial_source', None): continue result = results['styles'][-1]['sources'][source] = {} - sys.stdout.write('%s' % (source + ' ' * (slen - len(source)))) - sys.stdout.flush() large_image.cache_util.cachesClear() try: t = time.time() ts = large_image.tilesource.AvailableTileSources[source](sourcePath, **kwargs) opentime = time.time() - t except Exception as exp: + if opts.can_read and projection and None in projections: + continue + sys.stdout.write('%s' % (source + ' ' * (slen - len(source)))) + sys.stdout.flush() result['exception'] = str(exp) result['error'] = 'open' sexp = str(exp).replace('\n', ' ').replace(' ', ' ').strip() @@ -224,6 +226,8 @@ def source_compare(sourcePath, opts): # noqa sys.stdout.write('%s %s\n' % (' ' * slen, sexp[78 - slen: 2 * (78 - slen)])) sys.stdout.flush() continue + sys.stdout.write('%s' % (source + ' ' * (slen - len(source)))) + sys.stdout.flush() sizeX, sizeY = ts.sizeX, ts.sizeY result['sizeX'], result['sizeY'] = ts.sizeX, ts.sizeY try: @@ -304,7 +308,13 @@ def source_compare(sourcePath, opts): # noqa sys.stdout.flush() write_thumb(img[0], source, thumbs, 'thumbnail', opts, styleidx, projidx) t = time.time() - img = ts.getTile(tx0, ty0, tz0, sparseFallback=True) + try: + img = ts.getTile(tx0, ty0, tz0, sparseFallback=True) + except Exception as exp: + result['exception'] = str(exp) + result['error'] = 'gettile' + sys.stdout.write(' fail\n') + continue tile0time = time.time() - t result['tile0time'] = tile0time sys.stdout.write(' %8.3fs' % tile0time) @@ -395,11 +405,19 @@ def source_compare(sourcePath, opts): # noqa onlyMinMax=True, output=dict(maxWidth=2048, maxHeight=2048), resample=0, **kwargs) if 'max' not in h: + result['error'] = 'max' + sys.stdout.write(' fail\n') + sys.stdout.flush() + continue + try: + maxval = max(h['max'].tolist()) + maxval = 2 ** (int(math.log(maxval or 1) / math.log(2)) + 1) if maxval > 1 else 1 + except (TypeError, OverflowError) as exp: + result['exception'] = str(exp) + result['error'] = 'maxval' sys.stdout.write(' fail\n') sys.stdout.flush() continue - maxval = max(h['max'].tolist()) - maxval = 2 ** (int(math.log(maxval or 1) / math.log(2)) + 1) if maxval > 1 else 1 # thumbnail histogram h = ts.histogram(bins=9, output=dict(maxWidth=256, maxHeight=256), range=[0, maxval], resample=0, **kwargs)