diff --git a/conda/meta.yaml b/conda/meta.yaml index 0b73ad9..75eaa86 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -28,6 +28,7 @@ test: - tests requires: - aiohttp + - pyarrow - pytest - requests commands: diff --git a/intake_geopandas/__init__.py b/intake_geopandas/__init__.py index d21d7f3..383d773 100644 --- a/intake_geopandas/__init__.py +++ b/intake_geopandas/__init__.py @@ -2,15 +2,23 @@ import intake from ._version import get_versions -from .geopandas import GeoJSONSource, PostGISSource, ShapefileSource, SpatiaLiteSource +from .geopandas import ( + GeoJSONSource, + GeoPandasFileSource, + GeoParquetSource, + PostGISSource, + ShapefileSource, + SpatiaLiteSource, +) from .regionmask import RegionmaskSource __version__ = get_versions()['version'] del get_versions - __all__ = [ 'GeoJSONSource', + 'GeoPandasFileSource', + 'GeoParquetSource', 'PostGISSource', 'ShapefileSource', 'SpatiaLiteSource', diff --git a/intake_geopandas/geopandas.py b/intake_geopandas/geopandas.py index 92da280..0ea7708 100644 --- a/intake_geopandas/geopandas.py +++ b/intake_geopandas/geopandas.py @@ -57,6 +57,8 @@ def _close(self): class GeoPandasFileSource(GeoPandasSource): + name="geopandasfile" + def __init__( self, urlpath, @@ -67,6 +69,11 @@ def __init__( metadata=None, ): """ + A source for a file opened by geopandas. Specializations of this are provided + for shapefiles and geojson, but this base class can also be used directly + for other file types by providing an OGR driver to `geopandas_kwargs`, e.g. + `geopandas_kwargs={"driver": "GPKG"}` to open a geopackage. + Parameters ---------- urlpath : str or iterable, location of data @@ -149,6 +156,27 @@ def _resolve_single_file(self, filelist): ) +class GeoParquetSource(GeoPandasFileSource): + name = "geoparquet" + + def _open_dataset(self): + """ + Open dataset using geopandas. + """ + if self._use_fsspec: + with fsspec.open_files(self.urlpath, **self._storage_options) as f: + f = self._resolve_single_file(f) if len(f) > 1 else f[0] + self._dataframe = geopandas.read_parquet( + f, + **self._geopandas_kwargs, + ) + else: + self._dataframe = geopandas.read_parquet( + self.urlpath, + **self._geopandas_kwargs + ) + + class GeoPandasSQLSource(GeoPandasSource): def __init__( self, uri, sql_expr=None, table=None, geopandas_kwargs=None, metadata=None diff --git a/requirements.txt b/requirements.txt index b441ebc..d05e7c3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,2 @@ -intake geopandas -pytest -fsspec +intake diff --git a/setup.py b/setup.py index 53b85a6..06726a6 100644 --- a/setup.py +++ b/setup.py @@ -12,8 +12,8 @@ cmdclass=versioneer.get_cmdclass(), description='Geopandas plugin for Intake', url='https://github.com/intake/intake_geopandas', - maintainer='Jacob Tomlinson', - maintainer_email='jacob.tomlinson@informaticslab.co.uk', + maintainer='Ian Rose', + maintainer_email='ian.r.rose@gmail.com', license='BSD', py_modules=['intake_geopandas'], packages=find_packages(), @@ -21,15 +21,24 @@ entry_points={ 'intake.drivers': [ 'geojson = intake_geopandas.geopandas:GeoJSONSource', + 'geopandasfile = intake_geopandas.geopandas:GeoPandasFileSource', + 'geoparquet = intake_geopandas.geopandas:GeoParquetSource', 'postgis = intake_geopandas.geopandas:PostGISSource', 'shapefile = intake_geopandas.geopandas:ShapefileSource', 'spatialite = intake_geopandas.geopandas:SpatiaLiteSource', 'regionmask = intake_geopandas.regionmask:RegionmaskSource', ] }, + classifiers=[ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + ], + python_requires=">=3.6", include_package_data=True, install_requires=requires, - extras_require={'':['regionmask']}, + extras_require={"arrow": ["pyarrow"], "regionmask":["regionmask"]}, long_description_content_type='text/markdown', long_description=open('README.md').read(), zip_safe=False, diff --git a/tests/data/countries.gpkg b/tests/data/countries.gpkg new file mode 100644 index 0000000..d76f488 Binary files /dev/null and b/tests/data/countries.gpkg differ diff --git a/tests/data/countries.parquet b/tests/data/countries.parquet new file mode 100644 index 0000000..89adffd Binary files /dev/null and b/tests/data/countries.parquet differ diff --git a/tests/test_file_source.py b/tests/test_file_source.py index bd74b32..f98c91d 100644 --- a/tests/test_file_source.py +++ b/tests/test_file_source.py @@ -4,7 +4,8 @@ from pkg_resources import get_distribution, parse_version -from intake_geopandas import GeoJSONSource, ShapefileSource +from intake_geopandas import GeoJSONSource, GeoPandasFileSource, GeoParquetSource, ShapefileSource +from geopandas import GeoDataFrame geom_col_type = ( "object" @@ -30,6 +31,18 @@ def geojson_filenames(): return dict(countries=os.path.join(basedir, "data", "countries.geo.json")) +@pytest.fixture +def gpkg_filename(): + basedir = os.path.dirname(__file__) + return os.path.join(basedir, "data", "countries.gpkg") + + +@pytest.fixture +def geoparquet_filename(): + basedir = os.path.dirname(__file__) + return os.path.join(basedir, "data", "countries.parquet") + + @pytest.fixture def geojson_datasource(geojson_filenames): return GeoJSONSource(geojson_filenames["countries"]) @@ -47,7 +60,7 @@ def test_shape_datasource(shape_datasource): } -def test_countries_datasource(geojson_datasource): +def test_geojson_datasource(geojson_datasource): info = geojson_datasource.discover() geojson_datasource.read() assert info["dtype"] == { @@ -55,3 +68,18 @@ def test_countries_datasource(geojson_datasource): "id": "object", "name": "object", } + + +def test_alternative_ogr_driver(gpkg_filename): + gpkg_datasource = GeoPandasFileSource( + gpkg_filename, + geopandas_kwargs={"driver": "GPKG"}, + ) + gdf = gpkg_datasource.read() + assert isinstance(gdf, GeoDataFrame) + + +def test_geoparquet_source(geoparquet_filename): + datasource = GeoParquetSource(geoparquet_filename) + gdf = datasource.read() + assert isinstance(gdf, GeoDataFrame)