Skip to content

Commit

Permalink
Merge pull request #22 from aaronspring/AS_intake_regionmask
Browse files Browse the repository at this point in the history
intake_geopandas.regionmask
  • Loading branch information
Ian Rose authored Sep 29, 2020
2 parents d5de6c5 + 49886c4 commit c606762
Show file tree
Hide file tree
Showing 8 changed files with 271 additions and 50 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ script:
- |
flake8 .
if [ "$TRAVIS_OS_NAME" = "linux" ]; then
travis_wait 30 conda build -c defaults -c conda-forge ./conda
travis_wait 40 conda build -c defaults -c conda-forge ./conda
else
# Workaround for Travis-CI bug #2: https://github.com/travis-ci/travis-ci/issues/7773
conda build -c defaults -c conda-forge --no-test ./conda
Expand Down
16 changes: 8 additions & 8 deletions intake_geopandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# -*- coding: utf-8 -*-
import intake

from ._version import get_versions
from .geopandas import GeoJSONSource, PostGISSource, ShapefileSource, SpatiaLiteSource
from .regionmask import RegionmaskSource

__version__ = get_versions()['version']
del get_versions

import intake
from .geopandas import (
GeoJSONSource,
PostGISSource,
ShapefileSource,
SpatiaLiteSource
)

__all__ = [
'GeoJSONSource',
'PostGISSource',
'ShapefileSource',
'SpatiaLiteSource'
'SpatiaLiteSource',
'RegionmaskSource',
]
62 changes: 33 additions & 29 deletions intake_geopandas/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
import geopandas
from intake.source.base import DataSource, Schema

from . import __version__
from ._version import get_versions

__version__ = get_versions()['version']
del get_versions


class GeoPandasSource(DataSource, ABC):
"""
Base class intake source for loading GeoDataFrames.
"""

version = __version__
container = 'dataframe'
partition_access = True
Expand All @@ -29,11 +33,13 @@ def _get_schema(self):

dtypes = self._dataframe.dtypes.to_dict()
dtypes = {n: str(t) for (n, t) in dtypes.items()}
return Schema(datashape=None,
dtype=dtypes,
shape=(None, len(dtypes)),
npartitions=1,
extra_metadata={})
return Schema(
datashape=None,
dtype=dtypes,
shape=(None, len(dtypes)),
npartitions=1,
extra_metadata={},
)

def _get_partition(self, i):
self._get_schema()
Expand Down Expand Up @@ -73,7 +79,8 @@ def __init__(
Whether to use fsspec to open `urlpath`. By default, `urlpath` is passed
directly to GeoPandas, which opens the file using `fiona`. However, for some
use cases it may be beneficial to read the file using `fsspec` before
passing the resulting bytes to GeoPandas (e.g., when using `fsspec` caching).
passing the resulting bytes to GeoPandas (e.g., when using `fsspec`
caching).
Note that fiona/GDAL and `fsspec` have mutually-incompatible URL chaining
syntaxes, so the URLs passed to each may be significantly different.
Expand All @@ -90,11 +97,10 @@ def __init__(
"""
self.urlpath = urlpath
self._use_fsspec = use_fsspec
self._storage_options = storage_options or {}
self.storage_options = storage_options or {}
self._bbox = bbox
self._geopandas_kwargs = geopandas_kwargs or {}
self._dataframe = None
self.storage_options = storage_options or {}

super().__init__(metadata=metadata)

Expand All @@ -103,53 +109,50 @@ def _open_dataset(self):
Open dataset using geopandas.
"""
if self._use_fsspec:
with fsspec.open_files(self.urlpath, **self._storage_options) as f:
with fsspec.open_files(self.urlpath, **self.storage_options) as f:
f = self._resolve_single_file(f) if len(f) > 1 else f[0]
self._dataframe = geopandas.read_file(
f,
bbox=self._bbox,
**self._geopandas_kwargs,
f, bbox=self._bbox, **self._geopandas_kwargs,
)
else:
self._dataframe = geopandas.read_file(
self.urlpath,
bbox=self._bbox,
**self._geopandas_kwargs
self.urlpath, bbox=self._bbox, **self._geopandas_kwargs
)

def _resolve_single_file(self, filelist):
"""
Given a list of fsspec OpenFiles, choose one to pass to geopandas.
"""
raise NotImplementedError(
"Opening multiple files is not supported by this driver"
'Opening multiple files is not supported by this driver'
)


class GeoJSONSource(GeoPandasFileSource):
name = "geojson"
name = 'geojson'


class ShapefileSource(GeoPandasFileSource):
name = "shapefile"
name = 'shapefile'

def _resolve_single_file(self, filelist):
"""
Given a list of fsspec OpenFiles, find a .shp file.
"""
local_files = fsspec.open_local(self.urlpath, **self.storage_options)
for f in local_files:
if f.endswith(".shp"):
if f.endswith('.shp'):
return f
raise ValueError(
f"No shapefile found in {filelist}, if you are using fsspec caching"
" consider using same_names=True"
f'No shapefile found in {filelist}, if you are using fsspec caching'
' consider using same_names=True'
)


class GeoPandasSQLSource(GeoPandasSource):
def __init__(self, uri, sql_expr=None, table=None,
geopandas_kwargs=None, metadata=None):
def __init__(
self, uri, sql_expr=None, table=None, geopandas_kwargs=None, metadata=None
):
"""
Parameters
----------
Expand All @@ -168,9 +171,9 @@ def __init__(self, uri, sql_expr=None, table=None,
if sql_expr:
self.sql_expr = sql_expr
elif table:
self.sql_expr = f"SELECT * FROM {table}"
self.sql_expr = f'SELECT * FROM {table}'
else:
raise ValueError("Must provide either a sql_expr or a table")
raise ValueError('Must provide either a sql_expr or a table')

self._geopandas_kwargs = geopandas_kwargs or {}
self._dataframe = None
Expand All @@ -179,12 +182,13 @@ def __init__(self, uri, sql_expr=None, table=None,

def _open_dataset(self):
self._dataframe = geopandas.read_postgis(
self.sql_expr, self.uri, **self._geopandas_kwargs)
self.sql_expr, self.uri, **self._geopandas_kwargs
)


class PostGISSource(GeoPandasSQLSource):
name = "postgis"
name = 'postgis'


class SpatiaLiteSource(GeoPandasSQLSource):
name = "spatialite"
name = 'spatialite'
78 changes: 78 additions & 0 deletions intake_geopandas/regionmask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from intake.source.base import Schema

from .geopandas import GeoPandasFileSource


class RegionmaskSource(GeoPandasFileSource):
name = 'regionmask'

def __init__(
self,
urlpath,
use_fsspec=False,
storage_options=None,
bbox=None,
geopandas_kwargs=None,
metadata=None,
regionmask_kwargs=None,
):
"""
urlpath : str or iterable, location of data
Either the absolute or relative path to the file or URL to be
opened. Some examples:
- ``{{ CATALOG_DIR }}data/states.shp``
- ``http://some.domain.com/data/states.geo.json``
use_fsspec: bool
Whether to use fsspec to open `urlpath`. By default, `urlpath` is passed
directly to GeoPandas, which opens the file using `fiona`. However, for some
use cases it may be beneficial to read the file using `fsspec` before
passing the resulting bytes to GeoPandas (e.g., when using `fsspec`
caching).
Note that fiona/GDAL and `fsspec` have mutually-incompatible URL chaining
syntaxes, so the URLs passed to each may be significantly different.
storage_options: dict
Storage options to pass to fsspec when opening. Only used when
`use_fsspec=True`.
bbox : tuple | GeoDataFrame or GeoSeries, default None
Filter features by given bounding box, GeoSeries, or GeoDataFrame.
CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
geopandas_kwargs : dict
Any further arguments to pass to geopandas's read_file function.
regionmask_kwargs : dict
Any further arguments to pass to regionmask.from_geopandas.
"""
self._regionmask_kwargs = regionmask_kwargs or {}

super().__init__(
urlpath=urlpath,
metadata=metadata,
use_fsspec=use_fsspec,
storage_options=storage_options,
geopandas_kwargs=geopandas_kwargs,
bbox=bbox,
)

def _open_dataset(self):
try:
import regionmask
except ImportError:
raise ImportError('please install regionmask')
super()._open_dataset()
self._dtypes = self._dataframe.dtypes.to_dict()
self._dtypes = {n: str(t) for (n, t) in self._dtypes.items()}
self._dataframe = regionmask.from_geopandas(
self._dataframe, **self._regionmask_kwargs
)

def _get_schema(self):

if self._dataframe is None:
self._open_dataset()

return Schema(
datashape=None,
dtype=self._dtypes,
shape=(None, len(self._dtypes)),
npartitions=1,
extra_metadata={},
)
10 changes: 7 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-

from setuptools import setup, find_packages
from setuptools import find_packages, setup

import versioneer

requires = open('requirements.txt').read().strip().split('\n')
Expand All @@ -10,7 +11,7 @@
version=versioneer.get_version(),
cmdclass=versioneer.get_cmdclass(),
description='Geopandas plugin for Intake',
url='https://github.com/informatics-lab/intake_geopandas',
url='https://github.com/intake/intake_geopandas',
maintainer='Jacob Tomlinson',
maintainer_email='jacob.tomlinson@informaticslab.co.uk',
license='BSD',
Expand All @@ -23,10 +24,13 @@
'postgis = intake_geopandas.geopandas:PostGISSource',
'shapefile = intake_geopandas.geopandas:ShapefileSource',
'spatialite = intake_geopandas.geopandas:SpatiaLiteSource',
'regionmask = intake_geopandas.regionmask:RegionmaskSource',
]
},
include_package_data=True,
install_requires=requires,
extras_require={'':['regionmask']},
long_description_content_type='text/markdown',
long_description=open('README.md').read(),
zip_safe=False, )
zip_safe=False,
)
38 changes: 34 additions & 4 deletions tests/data/shape.catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,39 @@ sources:
urlpath: '{CATALOG_PATH}/stations/stations.shp'

MEOW:
description: MEOW
driver: intake_geopandas.geopandas.ShapefileSource
args:
urlpath: http://maps.tnc.org/files/shp/MEOW-TNC.zip
description: MEOW
driver: intake_geopandas.geopandas.ShapefileSource
args:
urlpath: http://maps.tnc.org/files/shp/MEOW-TNC.zip

MEOW_regionmask:
description: MEOW for regionmask
driver: intake_geopandas.regionmask.RegionmaskSource
args:
urlpath: http://maps.tnc.org/files/shp/MEOW-TNC.zip
regionmask_kwargs:
names: ECOREGION
abbrevs: _from_name
source: http://maps.tnc.org
numbers: ECO_CODE_X
name: MEOW

MEOW_regionmask_cache:
description: MEOW for regionmask and cache
driver: intake_geopandas.regionmask.RegionmaskSource
args:
urlpath: simplecache::http://maps.tnc.org/files/shp/MEOW-TNC.zip
use_fsspec: true
storage_options:
simplecache:
same_names: true
cache_storage: tmp/intake_geopandas
regionmask_kwargs:
names: ECOREGION
abbrevs: _from_name
source: http://maps.tnc.org
numbers: ECO_CODE_X
name: MEOW


MEOW_simplecache:
Expand All @@ -32,6 +61,7 @@ sources:
driver: intake_geopandas.geopandas.ShapefileSource
args:
urlpath: simplecache::zip://gadm36_ALA_0*::https://biogeo.ucdavis.edu/data/gadm3.6/shp/gadm36_ALA_shp.zip
use_fsspec: true
storage_options:
simplecache:
same_names: true
Expand Down
Loading

0 comments on commit c606762

Please sign in to comment.