diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index ab386cad..bd5bc3be 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -3,5 +3,5 @@ Contributing Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given. -Please read the Birdhouse `Developer Guide `_ -and the `Raven Documentation `_ to get started. +Please read the Birdhouse `Developer Guide `_ +and the `Raven Documentation `_ to get started. diff --git a/Dockerfile b/Dockerfile index fde36aa7..ba8554f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ WORKDIR /code # Create conda environment COPY environment.yml . -RUN mamba env create -n raven -f environment.yml && mamba install -n raven gunicorn && mamba clean --all --yes +RUN mamba env create -n raven -f environment.yml && mamba install -n raven gunicorn && mamba clean --all --yes # Add the raven conda environment to the path ENV PATH=/opt/conda/envs/raven/bin:$PATH @@ -20,7 +20,7 @@ COPY . /code # Install raven RUN pip install . --no-deps -# Start WPS service on port 9099 on 0.0.0.0 +# Start WPS service on port 9099 EXPOSE 9099 CMD ["gunicorn", "--bind=0.0.0.0:9099", "raven.wsgi:application"] diff --git a/Makefile b/Makefile index f7c6d0e9..b07904a3 100644 --- a/Makefile +++ b/Makefile @@ -7,19 +7,23 @@ APP_NAME := raven-wps OS := $(shell uname) -WPS_PORT := 9099 -WPS_URL := http://0.0.0.0:$(WPS_PORT) +WPS_PORT ?= 9099 +WPS_URL ?= http://0.0.0.0:$(WPS_PORT) # If WPS_URL is overridden, this should also be overridden to match. -WPS_OUTPUT_URL := http://localhost:$(WPS_PORT)/outputs +WPS_OUTPUT_URL ?= http://localhost:$(WPS_PORT)/outputs -# This will only work on Linux (not macOS/homebrew GDAL) -GDAL_VERSION := $(shell gdal-config --version) +# This will only work for Linux and macOS/homebrew +ifeq ($(OS),"Linux") + GDAL_VERSION := $(shell gdal-config --version) +else ifeq ($(OS),"Darwin") + GDAL_VERSION := $(shell gdalinfo --version | awk '{print $2}' | sed s'/.$//') +endif # Used in target refresh-notebooks to make it looks like the notebooks have # been refreshed from the production server below instead of from the local dev # instance so the notebooks can also be used as tutorial notebooks. -OUTPUT_URL = https://pavics.ouranos.ca/wpsoutputs/raven +OUTPUT_URL ?= https://pavics.ouranos.ca/wpsoutputs/raven SANITIZE_FILE := https://github.com/Ouranosinc/PAVICS-e2e-workflow-tests/raw/master/notebooks/output-sanitize.cfg diff --git a/docker-compose.yml b/docker-compose.yml index 497eb5cd..819dc137 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,4 +28,4 @@ services: - ./:/opt/wps environment: - RAVENPY_TESTDATA_PATH=/testdata - command: bash -c "source activate wps && pytest -x" + command: bash -c "python -m pip install pytest && pytest -x" diff --git a/docs/source/dev_guide.rst b/docs/source/dev_guide.rst index 9f31c6a9..c4fdc520 100644 --- a/docs/source/dev_guide.rst +++ b/docs/source/dev_guide.rst @@ -9,19 +9,17 @@ Developer Guide .. WARNING:: To create new processes, look at examples in Emu_. - Re-create a fresh environment ----------------------------- -.. code-block:: console - - make stop # in case you previously did 'make start' - conda deactivate # exit the current 'raven' conda env so we can destroy it - conda env remove -n raven # destroy the current conda env to recreate one from scratch - conda env create -f environment.yml - conda activate raven - make develop # install raven-wps and additional dev tools +.. code-block:: shell + make stop # in case you previously did 'make start' + conda deactivate # exit the current 'raven' conda env so we can destroy it + conda env remove -n raven # destroy the current conda env to recreate one from scratch + conda env create -f environment.yml + conda activate raven + make develop # install raven-wps and additional dev tools Building the docs ----------------- @@ -49,10 +47,7 @@ First activate the ``raven`` Conda environment and install ``pytest``. .. code-block:: console - $ source activate raven - $ pip install -r requirements_dev.txt # if not already installed - OR - $ make develop + make develop Run quick tests (skip slow and online): @@ -87,8 +82,7 @@ Do the same as above using the ``Makefile``. Running notebooks tests ----------------------- -Assuming that the ``raven`` conda env has already been created and is up-to-date and -raven-wps has been installed with ``make develop``: +Assuming that the ``raven`` conda env has already been created and is up-to-date and raven-wps has been installed with ``make develop``: .. code-block:: console @@ -98,7 +92,7 @@ raven-wps has been installed with ``make develop``: # to test all notebooks make test-notebooks - Or: + Or, alternatively: .. code-block:: console @@ -106,20 +100,20 @@ raven-wps has been installed with ``make develop``: make docs/source/notebooks/Subset_climate_data_over_watershed.ipynb.run -The notebooks may also require other WPS services (Finch and Flyingpigeon). +The notebooks may also require other WPS services (`finch`). By default these are from the production server but we can point the notebooks to local servers if needed for development purposes: .. code-block:: console # to test all notebooks - make FLYINGPIGEON_WPS_URL=http://localhost:8093 FINCH_WPS_URL=http://localhost:5000 test-notebooks + make FINCH_WPS_URL=http://localhost:5000 test-notebooks -Or: + Or, alternatively: .. code-block:: console # to test a single notebook (note the .run at the end of the notebook path) - make FLYINGPIGEON_WPS_URL=http://localhost:8093 FINCH_WPS_URL=http://localhost:5000 docs/source/notebooks/Subset_climate_data_over_watershed.ipynb.run + make FINCH_WPS_URL=http://localhost:5000 docs/source/notebooks/Subset_climate_data_over_watershed.ipynb.run If instead we want to run the notebooks against the production raven-wps server or any other raven-wps servers: @@ -128,20 +122,19 @@ If instead we want to run the notebooks against the production raven-wps server # to test all notebooks make WPS_URL=https://pavics.ouranos.ca/twitcher/ows/proxy/raven/wps test-notebooks -Or: + Or, alternatively: .. code-block:: console # to test a single notebook (note the .run at the end of the notebook path) make WPS_URL=https://pavics.ouranos.ca/twitcher/ows/proxy/raven/wps docs/source/notebooks/Subset_climate_data_over_watershed.ipynb.run -We can also override all three of the server variables (WPS_URL, FINCH_WPS_URL, FLYINGPIGEON_WPS_URL) to pick and choose any servers/services from anywhere we want. +We can also override all three of the server variables (``WPS_URL``, ``FINCH_WPS_URL``) to pick and choose any servers/services from anywhere we want. Starting local Jupyter server to edit/develop notebooks ------------------------------------------------------- -Assuming that the ``raven`` conda env has already been created and is up-to-date and -raven-wps has been installed with ``make develop``: +Assuming that the ``raven`` conda env has already been created and is up-to-date and raven-wps has been installed with ``make develop``: .. code-block:: console @@ -151,23 +144,19 @@ raven-wps has been installed with ``make develop``: # to start local jupyter notebook server listing all current notebooks make notebook # Control-C to terminate once done - # Can also override all three WPS_URL, FINCH_WPS_URL and FLYINGPIGEON_WPS_URL here as well, + # Can also override WPS_URL and FINCH_WPS_URL here as well, # just like 'make test-notebooks' to be able to pick and choose any servers anywhere we want. # By overriding these variables at the 'make notebook' step, we will not need to # override them one by one in each notebook as each notebook will also look # for those variables as environment variables. - Bulk refresh all notebooks output --------------------------------- -This automated refresh only works for notebooks that passed ``make -test-notebooks`` above. For those that failed, manually starting a local -Jupyter server and refresh them manually. +This automated refresh only works for notebooks that passed ``make test-notebooks`` above. For those that failed, manually starting a local Jupyter server and refresh them manually. -Assuming that the ``raven`` conda env has already been created and is up-to-date and -raven-wps has been installed with ``make develop``: +Assuming that the ``raven`` conda env has already been created and is up-to-date and raven-wps has been installed with ``make develop``: .. code-block:: console @@ -177,17 +166,16 @@ raven-wps has been installed with ``make develop``: # to refresh all notebooks make refresh-notebooks -Or: + Or, alternatively: .. code-block:: console # to refresh a single notebook (note the .refresh at the end of the notebook path) make docs/source/notebooks/Assess_probabilistic_flood_risk.ipynb.refresh - # Can also override all three of the server variables (WPS_URL, FINCH_WPS_URL and FLYINGPIGEON_WPS_URL) here as well, + # Can also override the server variables (WPS_URL, FINCH_WPS_URL) here as well, # just like 'make test-notebooks' to be able to pick and choose any servers/services from anywhere we want. - Prepare a release ----------------- @@ -195,7 +183,7 @@ Update the Conda specification file to build identical environments_ on a specif .. note:: You should run this on your target OS, in our case Linux. -.. code-block:: console +.. code-block:: shell conda env create -f environment.yml source activate raven @@ -205,7 +193,6 @@ Update the Conda specification file to build identical environments_ on a specif .. _environments: https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#building-identical-conda-environments - Bump a new version ------------------ diff --git a/environment-dev.yml b/environment-dev.yml index fd4e8ed0..00a9d195 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -5,19 +5,20 @@ channels: dependencies: - python >=3.9,<3.13 - flit >=3.8,<4.0 - - birdy + - birdy >=0.8.6 - black >=24.10.0 - bump-my-version >=0.26.0 - flake8 >=7.0 - - flake8-rst-docstrings + - flake8-rst-docstrings >=0.3.0 - ipykernel - ipywidgets - isort >=5.13.2 - - nbconvert - - nbval - - pip >=23.3.0 + - nbconvert >=7.13.0 + - nbval >=0.10.0 + - pip >=24.2.0 - pre-commit >=3.6.0 - - pymetalink + - pymetalink >=6.5.2 - pytest >=8.0.0 - pytest-xdist >=3.2 - - python-build + - python-build >=1.0 + - yamllint >=1.33.0 diff --git a/environment.yml b/environment.yml index 0be32887..a7d425db 100644 --- a/environment.yml +++ b/environment.yml @@ -5,31 +5,30 @@ channels: dependencies: - python >=3.9,<3.13 - pywps >=4.6 - - affine - - anyascii + - affine >=2.4.0 + - anyascii >=0.3.2 - cartopy >=0.23.0 - click >=8.1.7 - fiona >=1.9 - geopandas >=0.12.0 - - geojson + - geojson >=3.1.0 - gdal >=3.0 - jinja2 >=3.1.4 - - matplotlib - - nbval - - netcdf4 + - matplotlib-base >=3.6.0 + - nbval >=0.10.0 + - netcdf4 >=1.6.5 - numpy >=1.23.0 - - owslib + - owslib >=0.29.0 - pandas >=2.2 - psutil >=6.0.0 - psycopg2 - - pymetalink + - pymetalink >=6.5.2 - pyogrio >=0.7.2 - pyproj >=3.4 - - rasterio - - rasterstats - - requests - - rioxarray + - rasterio >=1.4.0 + - rasterstats >=0.20.0 + - requests >=2.31.0 + - rioxarray >=0.15.0 - shapely >=2.0 - - urlpath - xarray >=2023.11.0 - xclim >=0.48.2 diff --git a/pyproject.toml b/pyproject.toml index 7df4d4a5..57efe8ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,76 +21,73 @@ classifiers = [ "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", "Operating System :: Unix", - "Programming Language :: Python", - "Natural Language :: English", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python", "Topic :: Scientific/Engineering :: Atmospheric Science", "Topic :: Scientific/Engineering :: GIS", - "Topic :: Scientific/Engineering :: Hydrology", - "License :: OSI Approved :: MIT License" + "Topic :: Scientific/Engineering :: Hydrology" ] dynamic = ["description", "version"] dependencies = [ - "anyascii", + "anyascii >=0.3.2", "click >=8.1.7", "jinja2 >=3.1.4", - "matplotlib", - "netCDF4", + "matplotlib >=3.6.0", + "netCDF4 >=1.6.5", "numpy >=1.23.0", - "owslib", + "owslib >=0.29.0", "pandas >=2.2", "psutil >=6.0.0", "pywps >=4.6", - "requests", + "requests >=2.31.0", "xarray >=2023.11.0", "xclim >=0.48.2", # GIS libraries - "affine", + "affine >=2.4.0", "cartopy >=0.23.0", "fiona >=1.9.0", - "geojson", + "geojson >=3.1.0", "geopandas >=0.12.0", "pyogrio >=0.7.2", "pyproj >=3.4", - "rasterio", - "rasterstats", - "requests", - "rioxarray", + "rasterio >=1.4.0", + "rasterstats >=0.20.0", + "rioxarray >=0.15.0", "shapely >=2.0", # Docs and other utilities - "fsspec", "psycopg2", # to use postgres to log pywps requests like in Prod - "pymetalink" + "pymetalink >=6.5.2" ] [project.optional-dependencies] dev = [ # Dev tools and testing - "birdhouse-birdy", + "birdhouse-birdy >=0.8.6", "black >=24.10.0", - "build", + "build >=1.0", "bump-my-version >=0.26.0", "flake8 >=7.0.0", - "flake8-alphabetize", - "flake8-rst-docstrings", - "flit >=3.8,<4.0", + "flake8-rst-docstrings >=0.3.0", + "flit >=3.9,<4.0", "ipykernel", "isort >=5.13.2", - "nbconvert", - "nbval", + "nbconvert >=7.13.0", + "nbval >=0.10.0", "nc-time-axis", - "pip >=23.3.0", + "pip >=24.2.0", "pre-commit >=3.6.0", "pytest >=8.0.0", "pytest-xdist >=3.2", - "yamllint" + "yamllint >=1.33.0" ] docs = [ "ipyleaflet", @@ -109,11 +106,11 @@ docs = [ raven-wps = "raven.cli:cli" [project.urls] -"Homepage" = "https://pavics-raven.readthedocs.io/" +"Homepage" = "https://pavics-sdi.readthedocs.io/projects/raven/en/latest/" "Source" = "https://github.com/Ouranosinc/raven/" "Changelog" = "https://pavics-sdi.readthedocs.io/projects/raven/en/latest/changes.html" "Issue tracker" = "https://github.com/Ouranosinc/raven/issues" -"About Ouranos" = "https://www.ouranos.ca/en/" +"About Ouranos" = "https://ouranos.ca/en/" [tool] @@ -122,7 +119,8 @@ target-version = [ "py39", "py310", "py311", - "py312" + "py312", + "py313" ] [tool.bumpversion] diff --git a/src/raven/utilities/analysis.py b/src/raven/utilities/analysis.py index 541f6bed..55e1b92a 100644 --- a/src/raven/utilities/analysis.py +++ b/src/raven/utilities/analysis.py @@ -18,7 +18,8 @@ def geom_prop(geom: Union[Polygon, MultiPolygon, GeometryCollection]) -> dict: - """Return a dictionary of geometry properties. + """ + Return a dictionary of geometry properties. Parameters ---------- @@ -58,7 +59,8 @@ def dem_prop( geom: Union[Polygon, MultiPolygon, list[Union[Polygon, MultiPolygon]]] = None, directory: Union[str, Path] = None, ) -> dict[str, float]: - """Return raster properties for each geometry. + """ + Return raster properties for each geometry. This @@ -120,14 +122,15 @@ def gdal_slope_analysis( set_output: Optional[Union[str, Path]] = None, units: str = "degree", ) -> np.ndarray: - """Return the slope of the terrain from the DEM. + """ + Return the slope of the terrain from the DEM. The slope is the magnitude of the gradient of the elevation. Parameters ---------- dem : str or Path - Path to file storing DEM. + The path to file storing DEM. set_output : str or Path, optional If set to a valid filepath, will write to this path, otherwise will use an in-memory gdal.Dataset. units : str @@ -178,14 +181,15 @@ def gdal_aspect_analysis( set_output: Union[str, Path, bool] = False, flat_values_are_zero: bool = False, ) -> Union[np.ndarray, Dataset]: - """Return the aspect of the terrain from the DEM. + """ + Return the aspect of the terrain from the DEM. The aspect is the compass direction of the steepest slope (0: North, 90: East, 180: South, 270: West). Parameters ---------- dem : str or Path - Path to file storing DEM. + The path to file storing DEM. set_output : str or Path or bool If set to a valid filepath, will write to this path, otherwise will use an in-memory gdal.Dataset. flat_values_are_zero : bool @@ -233,12 +237,13 @@ def gdal_aspect_analysis( def circular_mean_aspect(angles: np.ndarray) -> np.ndarray: - """Return the mean angular aspect based on circular arithmetic approach. + """ + Return the mean angular aspect based on circular arithmetic approach. Parameters ---------- - angles: np.ndarray - Array of aspect angles + angles : np.ndarray + Array of aspect angles. Returns ------- diff --git a/src/raven/utilities/checks.py b/src/raven/utilities/checks.py index 6f6c7bd1..28b69eaa 100644 --- a/src/raven/utilities/checks.py +++ b/src/raven/utilities/checks.py @@ -19,7 +19,10 @@ def single_file_check(file_list: Sequence[Union[str, Path]]) -> Any: - """Return the first element of a file list. Raise an error if the list is empty or contains more than one element. + """ + Return the first element of a file list. + + Raise an error if the list is empty or contains more than one element. Parameters ---------- @@ -46,7 +49,10 @@ def boundary_check( max_y: Union[int, float] = 60, min_y: Union[int, float] = -60, ) -> None: - r"""Verify that boundaries do not exceed specific latitudes for geographic coordinate data. Emit a UserWarning if so. + r""" + Verify that boundaries do not exceed specific latitudes for geographic coordinate data. + + Emit a UserWarning if so. Parameters ---------- @@ -97,7 +103,8 @@ def boundary_check( def multipolygon_check(geom: GeometryCollection) -> None: - """Perform a check to verify a geometry is a MultiPolygon + """ + Perform a check to verify a geometry is a MultiPolygon. Parameters ---------- @@ -123,7 +130,8 @@ def feature_contains( point: Union[tuple[Union[int, float, str], Union[str, float, int]], Point], shp: Union[str, Path, list[Union[str, Path]]], ) -> Union[dict, bool]: - """Return the first feature containing a location. + """ + Return the first feature containing a location. Parameters ---------- diff --git a/src/raven/utilities/geo.py b/src/raven/utilities/geo.py index cf59e1c7..1f50a592 100644 --- a/src/raven/utilities/geo.py +++ b/src/raven/utilities/geo.py @@ -34,7 +34,8 @@ def geom_transform( source_crs: Union[str, int, CRS] = WGS84, target_crs: Union[str, int, CRS] = None, ) -> GeometryCollection: - """Change the projection of a geometry. + """ + Change the projection of a geometry. Assuming a geometry's coordinates are in a `source_crs`, compute the new coordinates under the `target_crs`. @@ -86,7 +87,8 @@ def generic_raster_clip( padded: bool = True, raster_compression: str = RASTERIO_TIFF_COMPRESSION, ) -> None: - """Crop a raster file to a given geometry. + """ + Crop a raster file to a given geometry. Parameters ---------- @@ -98,9 +100,9 @@ def generic_raster_clip( Geometry defining the region to crop. touches : bool Whether to include cells that intersect the geometry or not. Default: True. - fill_with_nodata: bool + fill_with_nodata : bool Whether to keep pixel values for regions outside of shape or set as nodata or not. Default: True. - padded: bool + padded : bool Whether to add a half-pixel buffer to shape before masking or not. Default: True. raster_compression : str Level of data compression. Default: 'lzw'. @@ -153,7 +155,7 @@ def generic_raster_warp( Path to output raster. target_crs : str or dict Target projection identifier. - raster_compression: str + raster_compression : str Level of data compression. Default: 'lzw'. Returns @@ -198,7 +200,7 @@ def generic_vector_reproject( ---------- vector : Union[str, Path] Path to a file containing a valid vector layer. - projected: Union[str, Path] + projected : Union[str, Path] Path to a file to be written. source_crs : Union[str, dict, CRS] Projection identifier (proj4) for the source geometry, Default: '+proj=longlat +datum=WGS84 +no_defs'. diff --git a/src/raven/utilities/geoserver.py b/src/raven/utilities/geoserver.py index 3fdaf7b1..af77e31d 100644 --- a/src/raven/utilities/geoserver.py +++ b/src/raven/utilities/geoserver.py @@ -1,12 +1,12 @@ -""" -GeoServer interaction operations. +"""GeoServer interaction operations. Working assumptions for this module: * Point coordinates are passed as shapely.geometry.Point instances. * BBox coordinates are passed as (lon1, lat1, lon2, lat2). * Shapes (polygons) are passed as shapely.geometry.shape parsable objects. * All functions that require a CRS have a CRS argument with a default set to WGS84. -* GEO_URL points to the GeoServer instance hosting all files. +* GEOSERVER_URL points to the GeoServer instance hosting all files. +* For legacy reasons, we also accept the `GEO_URL` environment variable. TODO: Refactor to remove functions that are just 2-lines of code. For example, many function's logic essentially consists in creating the layer name. @@ -40,11 +40,18 @@ Intersects = None wfs_Point = None -# Do not remove the trailing / otherwise `urljoin` will remove the geoserver path. -# Can be set at runtime with `$ env GEO_URL=https://xx.yy.zz/geoserver/ ...`. -GEO_URL = os.getenv("GEO_URL", "https://pavics.ouranos.ca/geoserver/") +from .geo import determine_upstream_ids + +# Can be set at runtime with `$ env RAVENPY_GEOSERVER_URL=https://xx.yy.zz/geoserver/ ...`. +# For legacy reasons, we also accept the `GEO_URL` environment variable. +GEOSERVER_URL = os.getenv( + "RAVENPY_GEOSERVER_URL", + os.getenv("GEO_URL", "https://pavics.ouranos.ca/geoserver/"), +) +if not GEOSERVER_URL.endswith("/"): + GEOSERVER_URL = f"{GEOSERVER_URL}/" -# We store the contour of different hydrobasins domains +# We store the contour of different HydroBASINS domains hybas_dir = Path(__file__).parent.parent / "data" / "hydrobasins_domains" hybas_pat = "hybas_lake_{domain}_lev01_v1c.zip" @@ -53,6 +60,15 @@ hybas_domains = {dom: hybas_dir / hybas_pat.format(domain=dom) for dom in hybas_regions} +def _fix_server_url(server_url: str) -> str: + if not server_url.endswith("/"): + warnings.warn( + "The GeoServer url should end with a slash. Appending it to the url." + ) + return f"{server_url}/" + return server_url + + def _get_location_wfs( bbox: Optional[ tuple[ @@ -69,12 +85,13 @@ def _get_location_wfs( ] ] = None, layer: str = None, - geoserver: str = GEO_URL, + geoserver: str = GEOSERVER_URL, ) -> dict: - """Return leveled features from a hosted data set using bounding box coordinates and WFS 1.1.0 protocol. + """ + Return leveled features from a hosted data set using bounding box coordinates and WFS 1.1.0 protocol. - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. + For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. + If not geographic, subsetting based on projected coordinate system (Easting, Northing) boundaries. Parameters ---------- @@ -84,7 +101,7 @@ def _get_location_wfs( Geographic coordinates of an intersecting point (lon, lat). layer : str The WFS/WMS layer name requested. - geoserver: str + geoserver : str The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. Returns @@ -92,6 +109,8 @@ def _get_location_wfs( dict A GeoJSON-derived dictionary of vector features (FeatureCollection). """ + geoserver = _fix_server_url(geoserver) + wfs = WebFeatureService(url=urljoin(geoserver, "wfs"), version="2.0.0", timeout=30) if bbox and point: @@ -127,9 +146,10 @@ def _get_location_wfs( def _get_feature_attributes_wfs( attribute: Sequence[str], layer: str = None, - geoserver: str = GEO_URL, + geoserver: str = GEOSERVER_URL, ) -> str: - """Return WFS GetFeature URL request for attribute values. + """ + Return WFS GetFeature URL request for attribute values. Making this request will return a JSON response. @@ -139,7 +159,7 @@ def _get_feature_attributes_wfs( Attribute/field names. layer : str Name of geographic layer queried. - geoserver: str + geoserver : str The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. Returns @@ -151,6 +171,8 @@ def _get_feature_attributes_wfs( ----- Non-existent attributes will raise a cryptic DriverError from fiona. """ + geoserver = _fix_server_url(geoserver) + params = dict( service="WFS", version="2.0.0", @@ -167,15 +189,16 @@ def _filter_feature_attributes_wfs( attribute: str, value: Union[str, float, int], layer: str, - geoserver: str = GEO_URL, + geoserver: str = GEOSERVER_URL, ) -> str: - """Return WFS GetFeature URL request filtering geographic features based on a property's value. + """ + Return WFS GetFeature URL request filtering geographic features based on a property's value. Parameters ---------- attribute : str Attribute/field name. - value: Union[str, float, int] + value : Union[str, float, int] Value for attribute queried. layer : str Name of geographic layer queried. @@ -187,6 +210,7 @@ def _filter_feature_attributes_wfs( str WFS request URL. """ + geoserver = _fix_server_url(geoserver) try: attribute = str(attribute) @@ -209,73 +233,14 @@ def _filter_feature_attributes_wfs( return Request("GET", url=urljoin(geoserver, "wfs"), params=params).prepare().url -def _determine_upstream_ids( - fid: str, - df: pd.DataFrame, - *, - basin_field: str, - downstream_field: str, - basin_family: Optional[str] = None, -) -> pd.DataFrame: - """Return a list of upstream features by evaluating the downstream networks. - - Parameters - ---------- - fid : str - feature ID of the downstream feature of interest. - df : pd.DataFrame - A Dataframe comprising the watershed attributes. - basin_field : str - The field used to determine the id of the basin according to hydro project. - downstream_field : str - The field identifying the downstream sub-basin for the hydro project. - basin_family : str, optional - Regional watershed code (For HydroBASINS dataset). - - Returns - ------- - pd.DataFrame - Basins ids including `fid` and its upstream contributors. - """ - - def upstream_ids(bdf, bid): - return bdf[bdf[downstream_field] == bid][basin_field] - - # Note: Hydro Routing `SubId` is a float for some reason and Python float != GeoServer double. Cast them to int. - if isinstance(fid, float): - fid = int(fid) - df[basin_field] = df[basin_field].astype(int) - df[downstream_field] = df[downstream_field].astype(int) - - # Locate the downstream feature - ds = df.set_index(basin_field).loc[fid] - if basin_family is not None: - # Do a first selection on the main basin ID of the downstream feature. - sub = df[df[basin_family] == ds[basin_family]] - else: - sub = None - - # Find upstream basins - up = [fid] - for b in up: - tmp = upstream_ids(sub if sub is not None else df, b) - if len(tmp): - up.extend(tmp) - - return ( - sub[sub[basin_field].isin(up)] - if sub is not None - else df[df[basin_field].isin(up)] - ) - - def get_raster_wcs( coordinates: Union[Iterable, Sequence[Union[float, str]]], geographic: bool = True, layer: str = None, - geoserver: str = GEO_URL, + geoserver: str = GEOSERVER_URL, ) -> bytes: - """Return a subset of a raster image from the local GeoServer via WCS 2.0.1 protocol. + """ + Return a subset of a raster image from the local GeoServer via WCS 2.0.1 protocol. For geographic raster grids, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based on projected coordinate system (Easting, Northing) boundaries. @@ -296,6 +261,8 @@ def get_raster_wcs( bytes A GeoTIFF array. """ + geoserver = _fix_server_url(geoserver) + (left, down, right, up) = coordinates if geographic: @@ -332,7 +299,8 @@ def get_raster_wcs( def hydrobasins_upstream(feature: dict, domain: str) -> pd.DataFrame: - """Return a list of hydrobasins features located upstream. + """ + Return a list of hydrobasins features located upstream. Parameters ---------- @@ -363,7 +331,7 @@ def hydrobasins_upstream(feature: dict, domain: str) -> pd.DataFrame: df = gpd.read_file(filename=req, engine="pyogrio") # Filter upstream watersheds - return _determine_upstream_ids( + return determine_upstream_ids( fid=feature[basin_field], df=df, basin_field=basin_field, @@ -372,7 +340,8 @@ def hydrobasins_upstream(feature: dict, domain: str) -> pd.DataFrame: def hydrobasins_aggregate(gdf: pd.DataFrame) -> pd.DataFrame: - """Aggregate multiple HydroBASINS watersheds into a single geometry. + """ + Aggregate multiple HydroBASINS watersheds into a single geometry. Parameters ---------- @@ -408,7 +377,8 @@ def select_hybas_domain( ] = None, point: Optional[tuple[Union[int, float], Union[int, float]]] = None, ) -> str: - """Provided a given coordinate or boundary box, return the domain name of the geographic region the coordinate is located within. + """ + Provided a given coordinate or boundary box, return the domain name of the geographic region the coordinate is located within. Parameters ---------- @@ -441,9 +411,10 @@ def filter_hydrobasins_attributes_wfs( attribute: str, value: Union[str, float, int], domain: str, - geoserver: str = GEO_URL, + geoserver: str = GEOSERVER_URL, ) -> str: - """Return a URL that formats and returns a remote GetFeatures request from the USGS HydroBASINS dataset. + """ + Return a URL that formats and returns a remote GetFeatures request from the USGS HydroBASINS dataset. For geographic raster grids, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based on projected coordinate system (Easting, Northing) boundaries. @@ -464,6 +435,8 @@ def filter_hydrobasins_attributes_wfs( str URL to the GeoJSON-encoded WFS response. """ + geoserver = _fix_server_url(geoserver) + lakes = True level = 12 @@ -481,9 +454,10 @@ def get_hydrobasins_location_wfs( Union[str, float, int], ], domain: str = None, - geoserver: str = GEO_URL, -) -> str: - """Return features from the USGS HydroBASINS data set using bounding box coordinates. + geoserver: str = GEOSERVER_URL, +) -> dict: + """ + Return features from the USGS HydroBASINS data set using bounding box coordinates. For geographic raster grids, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based on projected coordinate system (Easting, Northing) boundaries. @@ -499,10 +473,11 @@ def get_hydrobasins_location_wfs( Returns ------- - str + dict A GeoJSON-encoded vector feature. - """ + geoserver = _fix_server_url(geoserver) + lakes = True level = 12 layer = f"public:USGS_HydroBASINS_{'lake_' if lakes else ''}{domain}_lev{str(level).zfill(2)}" @@ -521,9 +496,10 @@ def hydro_routing_upstream( fid: Union[str, float, int], level: int = 12, lakes: str = "1km", - geoserver: str = GEO_URL, + geoserver: str = GEOSERVER_URL, ) -> pd.Series: - """Return a list of hydro routing features located upstream. + """ + Return a list of hydro routing features located upstream. Parameters ---------- @@ -541,6 +517,8 @@ def hydro_routing_upstream( pd.Series Basins ids including `fid` and its upstream contributors. """ + geoserver = _fix_server_url(geoserver) + wfs = WebFeatureService(url=urljoin(geoserver, "wfs"), version="2.0.0", timeout=30) layer = f"public:routing_{lakes}Lakes_{str(level).zfill(2)}" @@ -553,7 +531,7 @@ def hydro_routing_upstream( df = gpd.read_file(resp) # Identify upstream features - df_upstream = _determine_upstream_ids( + df_upstream = determine_upstream_ids( fid=fid, df=df, basin_field="SubId", @@ -574,12 +552,13 @@ def get_hydro_routing_attributes_wfs( attribute: Sequence[str], level: int = 12, lakes: str = "1km", - geoserver: str = GEO_URL, + geoserver: str = GEOSERVER_URL, ) -> str: - """Return a URL that formats and returns a remote GetFeatures request from hydro routing dataset. + """ + Return a URL that formats and returns a remote GetFeatures request from hydro routing dataset. - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. + For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. + If not geographic, subsetting based on projected coordinate system (Easting, Northing) boundaries. Parameters ---------- @@ -589,15 +568,16 @@ def get_hydro_routing_attributes_wfs( Level of granularity requested for the lakes vector (range(7,13)). Default: 12. lakes : {"1km", "all"} Query the version of dataset with lakes under 1km in width removed ("1km") or return all lakes ("all"). - geoserver: str + geoserver : str The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. Returns ------- str URL to the GeoJSON-encoded WFS response. - """ + geoserver = _fix_server_url(geoserver) + layer = f"public:routing_{lakes}Lakes_{str(level).zfill(2)}" return _get_feature_attributes_wfs( attribute=attribute, layer=layer, geoserver=geoserver @@ -609,12 +589,13 @@ def filter_hydro_routing_attributes_wfs( value: Union[str, float, int] = None, level: int = 12, lakes: str = "1km", - geoserver: str = GEO_URL, + geoserver: str = GEOSERVER_URL, ) -> str: - """Return a URL that formats and returns a remote GetFeatures request from hydro routing dataset. + """ + Return a URL that formats and returns a remote GetFeatures request from hydro routing dataset. - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. + For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. + If not geographic, subsetting based on projected coordinate system (Easting, Northing) boundaries. Parameters ---------- @@ -633,8 +614,9 @@ def filter_hydro_routing_attributes_wfs( ------- str URL to the GeoJSON-encoded WFS response. - """ + geoserver = _fix_server_url(geoserver) + layer = f"public:routing_{lakes}Lakes_{str(level).zfill(2)}" return _filter_feature_attributes_wfs( attribute=attribute, value=value, layer=layer, geoserver=geoserver @@ -648,12 +630,13 @@ def get_hydro_routing_location_wfs( ], lakes: str, level: int = 12, - geoserver: str = GEO_URL, + geoserver: str = GEOSERVER_URL, ) -> dict: - """Return features from the hydro routing data set using bounding box coordinates. + """ + Return features from the hydro routing data set using bounding box coordinates. - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. + For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. + If not geographic, subsetting based on projected coordinate system (Easting, Northing) boundaries. Parameters ---------- @@ -663,15 +646,16 @@ def get_hydro_routing_location_wfs( Query the version of dataset with lakes under 1km in width removed ("1km") or return all lakes ("all"). level : int Level of granularity requested for the lakes vector (range(7,13)). Default: 12. - geoserver: str + geoserver : str The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. Returns ------- dict A GeoJSON-derived dictionary of vector features (FeatureCollection). - """ + geoserver = _fix_server_url(geoserver) + layer = f"public:routing_{lakes}Lakes_{str(level).zfill(2)}" if not wfs_Point and not Intersects: diff --git a/src/raven/utilities/io.py b/src/raven/utilities/io.py index 95d95ed5..12ada198 100644 --- a/src/raven/utilities/io.py +++ b/src/raven/utilities/io.py @@ -45,7 +45,8 @@ def safe_extract( def address_append(address: Union[str, Path]) -> str: - """Format a URL/URI to be more easily read with libraries such as "rasterstats". + """ + Format a URL/URI to be more easily read with libraries such as "rasterstats". Parameters ---------- diff --git a/src/raven/utilities/testdata.py b/src/raven/utilities/testdata.py index 8d35d131..79ac458e 100644 --- a/src/raven/utilities/testdata.py +++ b/src/raven/utilities/testdata.py @@ -42,7 +42,8 @@ def get_local_testdata( branch: str = "master", _local_cache: Union[str, os.PathLike] = _default_cache_dir, ) -> Union[Path, list[Path]]: - """Copy specific testdata from a default cache to a temporary folder. + """ + Copy specific testdata from a default cache to a temporary folder. Return files matching `pattern` in the default cache dir and move to a local temp folder. @@ -177,8 +178,8 @@ def get_file( branch: str = "master", cache_dir: Path = _default_cache_dir, ) -> Union[Path, list[Path]]: - """ - Return a file from an online GitHub-like repository. + """Return a file from an online GitHub-like repository. + If a local copy is found then always use that to avoid network traffic. Parameters @@ -224,8 +225,8 @@ def query_folder( github_url: str = "https://github.com/Ouranosinc/raven-testdata", branch: str = "master", ) -> list[str]: - """ - Lists the files available for retrieval from a remote git repository with get_file. + """Lists the files available for retrieval from a remote git repository with get_file. + If provided a folder name, will perform a globbing-like filtering operation for parent folders. Parameters @@ -279,7 +280,8 @@ def open_dataset( cache_dir: Path = _default_cache_dir, **kwds, ) -> Dataset: - r"""Open a dataset from the online GitHub-like repository. + r""" + Open a dataset from the online GitHub-like repository. If a local copy is found then always use that to avoid network traffic. diff --git a/src/raven/utils.py b/src/raven/utils.py index b0bb0120..daa07c9d 100644 --- a/src/raven/utils.py +++ b/src/raven/utils.py @@ -72,7 +72,8 @@ def gather_dem_tile( geographic: bool = True, raster: str = EARTH_ENV_DEM, ) -> Path: - """Return a raster coverage for a given vector geometry. + """ + Return a raster coverage for a given vector geometry. Parameters ---------- @@ -88,7 +89,7 @@ def gather_dem_tile( Returns ------- Path - Path to raster file. + The path to raster file. """ bbox = get_bbox(vector_file) raster_layer = raster @@ -102,12 +103,13 @@ def gather_dem_tile( def parse_lonlat(lonlat: Union[str, tuple[str, str]]) -> tuple[float, float]: - """Return longitude and latitude from a string. + """ + Return longitude and latitude from a string. Parameters ---------- lonlat : str or Tuple[str, str] - A tuple or a str of lon and lat coordinates. + A tuple or a str of lon and lat coordinates. Returns ------- @@ -149,7 +151,7 @@ def zonalstats_raster_file( The data encoding of the raster used to write the grid (e.g. 'int16'). crs : str The coordinate reference system. - zip_archive: bool + zip_archive : bool Return the files as a zipped archive (default: False). Returns