Skip to content

Commit

Permalink
Merge pull request #19 from ian-r-rose/geoparquet
Browse files Browse the repository at this point in the history
Geoparquet
  • Loading branch information
Ian Rose authored Sep 29, 2020
2 parents c606762 + e09eca0 commit e08c89b
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 10 deletions.
1 change: 1 addition & 0 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ test:
- tests
requires:
- aiohttp
- pyarrow
- pytest
- requests
commands:
Expand Down
12 changes: 10 additions & 2 deletions intake_geopandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,23 @@
import intake

from ._version import get_versions
from .geopandas import GeoJSONSource, PostGISSource, ShapefileSource, SpatiaLiteSource
from .geopandas import (
GeoJSONSource,
GeoPandasFileSource,
GeoParquetSource,
PostGISSource,
ShapefileSource,
SpatiaLiteSource,
)
from .regionmask import RegionmaskSource

__version__ = get_versions()['version']
del get_versions


__all__ = [
'GeoJSONSource',
'GeoPandasFileSource',
'GeoParquetSource',
'PostGISSource',
'ShapefileSource',
'SpatiaLiteSource',
Expand Down
28 changes: 28 additions & 0 deletions intake_geopandas/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def _close(self):


class GeoPandasFileSource(GeoPandasSource):
name="geopandasfile"

def __init__(
self,
urlpath,
Expand All @@ -67,6 +69,11 @@ def __init__(
metadata=None,
):
"""
A source for a file opened by geopandas. Specializations of this are provided
for shapefiles and geojson, but this base class can also be used directly
for other file types by providing an OGR driver to `geopandas_kwargs`, e.g.
`geopandas_kwargs={"driver": "GPKG"}` to open a geopackage.
Parameters
----------
urlpath : str or iterable, location of data
Expand Down Expand Up @@ -149,6 +156,27 @@ def _resolve_single_file(self, filelist):
)


class GeoParquetSource(GeoPandasFileSource):
name = "geoparquet"

def _open_dataset(self):
"""
Open dataset using geopandas.
"""
if self._use_fsspec:
with fsspec.open_files(self.urlpath, **self._storage_options) as f:
f = self._resolve_single_file(f) if len(f) > 1 else f[0]
self._dataframe = geopandas.read_parquet(
f,
**self._geopandas_kwargs,
)
else:
self._dataframe = geopandas.read_parquet(
self.urlpath,
**self._geopandas_kwargs
)


class GeoPandasSQLSource(GeoPandasSource):
def __init__(
self, uri, sql_expr=None, table=None, geopandas_kwargs=None, metadata=None
Expand Down
4 changes: 1 addition & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
intake
geopandas
pytest
fsspec
intake
15 changes: 12 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,33 @@
cmdclass=versioneer.get_cmdclass(),
description='Geopandas plugin for Intake',
url='https://github.com/intake/intake_geopandas',
maintainer='Jacob Tomlinson',
maintainer_email='jacob.tomlinson@informaticslab.co.uk',
maintainer='Ian Rose',
maintainer_email='ian.r.rose@gmail.com',
license='BSD',
py_modules=['intake_geopandas'],
packages=find_packages(),
package_data={'': ['*.csv', '*.yml', '*.html']},
entry_points={
'intake.drivers': [
'geojson = intake_geopandas.geopandas:GeoJSONSource',
'geopandasfile = intake_geopandas.geopandas:GeoPandasFileSource',
'geoparquet = intake_geopandas.geopandas:GeoParquetSource',
'postgis = intake_geopandas.geopandas:PostGISSource',
'shapefile = intake_geopandas.geopandas:ShapefileSource',
'spatialite = intake_geopandas.geopandas:SpatiaLiteSource',
'regionmask = intake_geopandas.regionmask:RegionmaskSource',
]
},
classifiers=[
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
],
python_requires=">=3.6",
include_package_data=True,
install_requires=requires,
extras_require={'':['regionmask']},
extras_require={"arrow": ["pyarrow"], "regionmask":["regionmask"]},
long_description_content_type='text/markdown',
long_description=open('README.md').read(),
zip_safe=False,
Expand Down
Binary file added tests/data/countries.gpkg
Binary file not shown.
Binary file added tests/data/countries.parquet
Binary file not shown.
32 changes: 30 additions & 2 deletions tests/test_file_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from pkg_resources import get_distribution, parse_version

from intake_geopandas import GeoJSONSource, ShapefileSource
from intake_geopandas import GeoJSONSource, GeoPandasFileSource, GeoParquetSource, ShapefileSource
from geopandas import GeoDataFrame

geom_col_type = (
"object"
Expand All @@ -30,6 +31,18 @@ def geojson_filenames():
return dict(countries=os.path.join(basedir, "data", "countries.geo.json"))


@pytest.fixture
def gpkg_filename():
basedir = os.path.dirname(__file__)
return os.path.join(basedir, "data", "countries.gpkg")


@pytest.fixture
def geoparquet_filename():
basedir = os.path.dirname(__file__)
return os.path.join(basedir, "data", "countries.parquet")


@pytest.fixture
def geojson_datasource(geojson_filenames):
return GeoJSONSource(geojson_filenames["countries"])
Expand All @@ -47,11 +60,26 @@ def test_shape_datasource(shape_datasource):
}


def test_countries_datasource(geojson_datasource):
def test_geojson_datasource(geojson_datasource):
info = geojson_datasource.discover()
geojson_datasource.read()
assert info["dtype"] == {
"geometry": geom_col_type,
"id": "object",
"name": "object",
}


def test_alternative_ogr_driver(gpkg_filename):
gpkg_datasource = GeoPandasFileSource(
gpkg_filename,
geopandas_kwargs={"driver": "GPKG"},
)
gdf = gpkg_datasource.read()
assert isinstance(gdf, GeoDataFrame)


def test_geoparquet_source(geoparquet_filename):
datasource = GeoParquetSource(geoparquet_filename)
gdf = datasource.read()
assert isinstance(gdf, GeoDataFrame)

0 comments on commit e08c89b

Please sign in to comment.