From 3ecf8532ce7e8738f5b57b37a5310ac7a32bf682 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 14 Mar 2022 08:15:44 -0400 Subject: [PATCH] Wrap triangulate (#731) Wrapping the triangulate function which does "Delaunay triangulation or Voronoi partitioning and gridding of Cartesian data". Original GMT documentation can be found at https://docs.generic-mapping-tools.org/6.3/triangulate.html. Aliased outgrid (G), spacing (I), projection (J), region (R), verbose (V), registration (r). * Refactor triangulate to use virtualfile_from_data * Refactor triangulate implementation to use pygmt.io.load_dataarray * Alias binary(b), nodata(d), find(e), coltypes(f), header(h), incols(i) * Rename the parameter 'table' to 'data' As per https://github.com/GenericMappingTools/pygmt/issues/1479. * Refactor test_triangulate to use Table_5_11_mean.xyz instead of tut_ship * Refactor test_triangulate_with_outgrid to use xr.testing.assert_allclose * Refactor test_triangulate_input_xyz to use pd.testing.assert_frame_equal * Implement regular_grid and delaunay_triples staticmethod for triangulate * Let list inputs to spacing (I) and incols (i) work Use I="sequence" and i="sequence_comma". * Ensure triangulate.delaunay_triples output_type is valid Must be either one of numpy, pandas or file * Autocorrect output_type to 'file' if outfile parameter is set * Allow only str or None inputs to outgrid parameter Xref https://github.com/GenericMappingTools/pygmt/issues/1807 * Use gmt get GMT_TRIANGULATE to check whether Watson or Shewchuk is used * State that Shewchuk is the default triangulation algorithm As per https://github.com/GenericMappingTools/gmt/pull/6438 * Actually document the output_type parameter for delaunay_triples Co-authored-by: Will Schlitzer Co-authored-by: Meghan Jones Co-authored-by: Dongdong Tian --- doc/api/index.rst | 3 + pygmt/__init__.py | 1 + pygmt/src/__init__.py | 1 + pygmt/src/triangulate.py | 397 ++++++++++++++++++++++++++++++++ pygmt/tests/test_triangulate.py | 169 ++++++++++++++ 5 files changed, 571 insertions(+) create mode 100644 pygmt/src/triangulate.py create mode 100644 pygmt/tests/test_triangulate.py diff --git a/doc/api/index.rst b/doc/api/index.rst index b488a2a4874..5287a36836e 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -124,6 +124,9 @@ Operations on tabular data sphdistance sphinterpolate surface + triangulate + triangulate.regular_grid + triangulate.delaunay_triples xyz2grd Operations on raster data diff --git a/pygmt/__init__.py b/pygmt/__init__.py index db33925edf2..23e1eab5de7 100644 --- a/pygmt/__init__.py +++ b/pygmt/__init__.py @@ -56,6 +56,7 @@ sphdistance, sphinterpolate, surface, + triangulate, which, x2sys_cross, x2sys_init, diff --git a/pygmt/src/__init__.py b/pygmt/src/__init__.py index 6c1a29b91de..a31cf8f88df 100644 --- a/pygmt/src/__init__.py +++ b/pygmt/src/__init__.py @@ -47,6 +47,7 @@ from pygmt.src.subplot import set_panel, subplot from pygmt.src.surface import surface from pygmt.src.text import text_ as text # "text" is an argument within "text_" +from pygmt.src.triangulate import triangulate from pygmt.src.velo import velo from pygmt.src.which import which from pygmt.src.wiggle import wiggle diff --git a/pygmt/src/triangulate.py b/pygmt/src/triangulate.py new file mode 100644 index 00000000000..e50eb086269 --- /dev/null +++ b/pygmt/src/triangulate.py @@ -0,0 +1,397 @@ +""" +triangulate - Delaunay triangulation or Voronoi partitioning and gridding of +Cartesian data. +""" +import warnings + +import pandas as pd +from pygmt.clib import Session +from pygmt.exceptions import GMTInvalidInput +from pygmt.helpers import ( + GMTTempFile, + build_arg_string, + fmt_docstring, + kwargs_to_strings, + use_alias, +) +from pygmt.io import load_dataarray + + +class triangulate: # pylint: disable=invalid-name + """ + Delaunay triangulation or Voronoi partitioning and gridding of Cartesian + data. + + Triangulate reads in x,y[,z] data and performs Delaunay triangulation, + i.e., it finds how the points should be connected to give the most + equilateral triangulation possible. If a map projection (give ``region`` + and ``projection``) is chosen then it is applied before the triangulation + is calculated. By default, the output is triplets of point id numbers that + make up each triangle. The id numbers refer to the points position (line + number, starting at 0 for the first line) in the input file. If ``outgrid`` + and ``spacing`` are set a grid will be calculated based on the surface + defined by the planar triangles. The actual algorithm used in the + triangulations is either that of Watson [1982] or Shewchuk [1996] [Default + is Shewchuk if installed; type ``gmt get GMT_TRIANGULATE`` on the command + line to see which method is selected]. Furthermore, if the Shewchuk + algorithm is installed then you can also perform the calculation of Voronoi + polygons and optionally grid your data via the natural nearest neighbor + algorithm. + + Note + ---- + For geographic data with global or very large extent you should consider + :gmt-docs:`sphtriangulate ` instead since + ``triangulate`` is a Cartesian or small-geographic area operator and is + unaware of periodic or polar boundary conditions. + """ + + @staticmethod + @fmt_docstring + @use_alias( + G="outgrid", + I="spacing", + J="projection", + R="region", + V="verbose", + b="binary", + d="nodata", + e="find", + f="coltypes", + h="header", + i="incols", + r="registration", + s="skiprows", + w="wrap", + ) + @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma") + def _triangulate( + data=None, x=None, y=None, z=None, *, output_type, outfile=None, **kwargs + ): + """ + Delaunay triangulation or Voronoi partitioning and gridding of + Cartesian data. + + Must provide ``outfile`` or ``outgrid``. + + Full option list at :gmt-docs:`triangulate.html` + + {aliases} + + Parameters + ---------- + x/y/z : np.ndarray + Arrays of x and y coordinates and values z of the data points. + data : str or {table-like} + Pass in (x, y, z) or (longitude, latitude, elevation) values by + providing a file name to an ASCII data table, a 2D + {table-classes}. + {J} + {R} + {I} + outgrid : bool or str + The name of the output netCDF file with extension .nc to store the + grid in. The interpolation is performed in the original + coordinates, so if your triangles are close to the poles you are + better off projecting all data to a local coordinate system before + using ``triangulate`` (this is true of all gridding routines) or + instead select :gmt-docs:`sphtriangulate `. + outfile : str or bool or None + The name of the output ASCII file to store the results of the + histogram equalization in. + output_type: str + Determines the output type. Use "file", "xarray", "pandas", or + "numpy". + {V} + {b} + {d} + {e} + {f} + {h} + {i} + {r} + Only valid with ``outgrid``. + {s} + {w} + + Returns + ------- + ret: numpy.ndarray or pandas.DataFrame or xarray.DataArray or None + Return type depends on the ``output_type`` parameter: + + - numpy.ndarray if ``output_type`` is "numpy" + - pandas.DataFrame if ``output_type`` is "pandas" + - xarray.DataArray if ``output_type`` is "xarray"" + - None if ``output_type`` is "file" (output is stored in + ``outgrid`` or ``outfile``) + """ + with Session() as lib: + # Choose how data will be passed into the module + table_context = lib.virtualfile_from_data( + check_kind="vector", data=data, x=x, y=y, z=z, required_z=False + ) + with table_context as infile: + # table output if outgrid is unset, else output to outgrid + if (outgrid := kwargs.get("G")) is None: + kwargs.update({">": outfile}) + arg_str = " ".join([infile, build_arg_string(kwargs)]) + lib.call_module(module="triangulate", args=arg_str) + + if output_type == "file": + return None + if output_type == "xarray": + return load_dataarray(outgrid) + + result = pd.read_csv(outfile, sep="\t", header=None) + if output_type == "numpy": + return result.to_numpy() + return result + + @staticmethod + @fmt_docstring + def regular_grid( # pylint: disable=too-many-arguments,too-many-locals + data=None, + x=None, + y=None, + z=None, + outgrid=None, + spacing=None, + projection=None, + region=None, + verbose=None, + binary=None, + nodata=None, + find=None, + coltypes=None, + header=None, + incols=None, + registration=None, + skiprows=None, + wrap=None, + **kwargs, + ): + """ + Delaunay triangle based gridding of Cartesian data. + + Reads in x,y[,z] data and performs Delaunay triangulation, i.e., it + finds how the points should be connected to give the most equilateral + triangulation possible. If a map projection (give ``region`` and + ``projection``) is chosen then it is applied before the triangulation + is calculated. By setting ``outgrid`` and ``spacing``, a grid will be + calculated based on the surface defined by the planar triangles. The + actual algorithm used in the triangulations is either that of Watson + [1982] or Shewchuk [1996] [Default is Shewchuk if installed; type + ``gmt get GMT_TRIANGULATE`` on the command line to see which method is + selected]. This choice is made during the GMT installation. + Furthermore, if the Shewchuk algorithm is installed then you can also + perform the calculation of Voronoi polygons and optionally grid your + data via the natural nearest neighbor algorithm. + + Must provide either ``data`` or ``x``, ``y``, and ``z``. + + Must provide ``region`` and ``spacing``. + + Full option list at :gmt-docs:`triangulate.html` + + Parameters + ---------- + x/y/z : np.ndarray + Arrays of x and y coordinates and values z of the data points. + data : str or {table-like} + Pass in (x, y[, z]) or (longitude, latitude[, elevation]) values by + providing a file name to an ASCII data table, a 2D + {table-classes}. + {J} + {R} + {I} + outgrid : str or None + The name of the output netCDF file with extension .nc to store the + grid in. The interpolation is performed in the original + coordinates, so if your triangles are close to the poles you are + better off projecting all data to a local coordinate system before + using ``triangulate`` (this is true of all gridding routines) or + instead select :gmt-docs:`sphtriangulate `. + {V} + {b} + {d} + {e} + {f} + {h} + {i} + {r} + {s} + {w} + + Returns + ------- + ret: xarray.DataArray or None + Return type depends on whether the ``outgrid`` parameter is set: + + - xarray.DataArray if ``outgrid`` is None (default) + - None if ``outgrid`` is a str (grid output is stored in + ``outgrid``) + + Note + ---- + For geographic data with global or very large extent you should + consider :gmt-docs:`sphtriangulate ` instead since + ``triangulate`` is a Cartesian or small-geographic area operator and is + unaware of periodic or polar boundary conditions. + """ + # Return an xarray.DataArray if ``outgrid`` is not set + with GMTTempFile(suffix=".nc") as tmpfile: + if isinstance(outgrid, str): + output_type = "file" + elif outgrid is None: + output_type = "xarray" + outgrid = tmpfile.name + else: + raise GMTInvalidInput( + "'outgrid' should be a proper file name or `None`" + ) + + return triangulate._triangulate( + data=data, + x=x, + y=y, + z=z, + output_type=output_type, + outgrid=outgrid, + spacing=spacing, + projection=projection, + region=region, + verbose=verbose, + binary=binary, + nodata=nodata, + find=find, + coltypes=coltypes, + header=header, + incols=incols, + registration=registration, + skiprows=skiprows, + wrap=wrap, + **kwargs, + ) + + @staticmethod + @fmt_docstring + def delaunay_triples( # pylint: disable=too-many-arguments,too-many-locals + data=None, + x=None, + y=None, + z=None, + output_type="pandas", + outfile=None, + projection=None, + verbose=None, + binary=None, + nodata=None, + find=None, + coltypes=None, + header=None, + incols=None, + skiprows=None, + wrap=None, + **kwargs, + ): + """ + Delaunay triangle based gridding of Cartesian data. + + Reads in x,y[,z] data and performs Delaunay triangulation, i.e., it + finds how the points should be connected to give the most equilateral + triangulation possible. If a map projection (give ``region`` and + ``projection``) is chosen then it is applied before the triangulation + is calculated. The actual algorithm used in the triangulations is + either that of Watson [1982] or Shewchuk [1996] [Default if installed; + type ``gmt get GMT_TRIANGULATE`` on the command line to see which + method is selected). + + Must provide either ``data`` or ``x``, ``y``, and ``z``. + + Full option list at :gmt-docs:`triangulate.html` + + Parameters + ---------- + x/y/z : np.ndarray + Arrays of x and y coordinates and values z of the data points. + data : str or {table-like} + Pass in (x, y, z) or (longitude, latitude, elevation) values by + providing a file name to an ASCII data table, a 2D + {table-classes}. + {J} + {R} + outfile : str or bool or None + The name of the output ASCII file to store the results of the + histogram equalization in. + output_type : str + Determine the format the xyz data will be returned in [Default is + ``pandas``]: + + - ``numpy`` - :class:`numpy.ndarray` + - ``pandas``- :class:`pandas.DataFrame` + - ``file`` - ASCII file (requires ``outfile``) + {V} + {b} + {d} + {e} + {f} + {h} + {i} + {s} + {w} + + Returns + ------- + ret : pandas.DataFrame or numpy.ndarray or None + Return type depends on ``outfile`` and ``output_type``: + + - None if ``outfile`` is set (output will be stored in file set by + ``outfile``) + - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if + ``outfile`` is not set (depends on ``output_type``) + + Note + ---- + For geographic data with global or very large extent you should + consider :gmt-docs:`sphtriangulate ` instead since + ``triangulate`` is a Cartesian or small-geographic area operator and is + unaware of periodic or polar boundary conditions. + """ + # Return a pandas.DataFrame if ``outfile`` is not set + if output_type not in ["numpy", "pandas", "file"]: + raise GMTInvalidInput( + "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'." + ) + + if isinstance(outfile, str) and output_type != "file": + msg = ( + f"Changing 'output_type' from '{output_type}' to 'file' " + "since 'outfile' parameter is set. Please use output_type='file' " + "to silence this warning." + ) + warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) + output_type = "file" + + # Return a pandas.DataFrame if ``outfile`` is not set + with GMTTempFile(suffix=".txt") as tmpfile: + if output_type != "file": + outfile = tmpfile.name + return triangulate._triangulate( + data=data, + x=x, + y=y, + z=z, + output_type=output_type, + outfile=outfile, + projection=projection, + verbose=verbose, + binary=binary, + nodata=nodata, + find=find, + coltypes=coltypes, + header=header, + incols=incols, + skiprows=skiprows, + wrap=wrap, + **kwargs, + ) diff --git a/pygmt/tests/test_triangulate.py b/pygmt/tests/test_triangulate.py new file mode 100644 index 00000000000..ec835f9316e --- /dev/null +++ b/pygmt/tests/test_triangulate.py @@ -0,0 +1,169 @@ +""" +Tests for triangulate. +""" +import os + +import numpy as np +import pandas as pd +import pytest +import xarray as xr +from pygmt import triangulate, which +from pygmt.exceptions import GMTInvalidInput +from pygmt.helpers import GMTTempFile, data_kind + + +@pytest.fixture(scope="module", name="dataframe") +def fixture_dataframe(): + """ + Load the table data from the sample bathymetry dataset. + """ + fname = which("@Table_5_11_mean.xyz", download="c") + return pd.read_csv( + fname, sep=r"\s+", header=None, names=["x", "y", "z"], skiprows=1 + )[:10] + + +@pytest.fixture(scope="module", name="expected_dataframe") +def fixture_dataframe_result(): + """ + Load the expected triangulate dataframe result. + """ + return pd.DataFrame( + data=[ + [7, 8, 2], + [8, 7, 9], + [7, 1, 0], + [1, 7, 2], + [1, 2, 4], + [8, 3, 2], + [9, 5, 3], + [5, 9, 6], + [5, 4, 3], + [4, 5, 6], + [4, 6, 1], + [3, 4, 2], + [9, 3, 8], + ] + ) + + +@pytest.fixture(scope="module", name="expected_grid") +def fixture_grid_result(): + """ + Load the expected triangulate grid result. + """ + return xr.DataArray( + data=[[779.6264, 752.1539, 749.38776], [771.2882, 726.9792, 722.1368]], + coords=dict(y=[5, 6], x=[2, 3, 4]), + dims=["y", "x"], + ) + + +@pytest.mark.parametrize("array_func", [np.array, xr.Dataset]) +def test_delaunay_triples_input_table_matrix(array_func, dataframe, expected_dataframe): + """ + Run triangulate.delaunay_triples by passing in a numpy.array or + xarray.Dataset. + """ + table = array_func(dataframe) + output = triangulate.delaunay_triples(data=table) + pd.testing.assert_frame_equal(left=output, right=expected_dataframe) + + +def test_delaunay_triples_input_xyz(dataframe, expected_dataframe): + """ + Run triangulate.delaunay_triples by passing in x, y, z numpy.ndarrays + individually. + """ + output = triangulate.delaunay_triples(x=dataframe.x, y=dataframe.y, z=dataframe.z) + pd.testing.assert_frame_equal(left=output, right=expected_dataframe) + + +def test_delaunay_triples_input_xy_no_z(dataframe, expected_dataframe): + """ + Run triangulate.delaunay_triples by passing in x and y, but no z. + """ + output = triangulate.delaunay_triples(x=dataframe.x, y=dataframe.y) + pd.testing.assert_frame_equal(left=output, right=expected_dataframe) + + +def test_delaunay_triples_wrong_kind_of_input(dataframe): + """ + Run triangulate.delaunay_triples using grid input that is not + file/matrix/vectors. + """ + data = dataframe.z.to_xarray() # convert pandas.Series to xarray.DataArray + assert data_kind(data) == "grid" + with pytest.raises(GMTInvalidInput): + triangulate.delaunay_triples(data=data) + + +def test_delaunay_triples_ndarray_output(dataframe, expected_dataframe): + """ + Test triangulate.delaunay_triples with "numpy" output type. + """ + output = triangulate.delaunay_triples(data=dataframe, output_type="numpy") + assert isinstance(output, np.ndarray) + np.testing.assert_allclose(actual=output, desired=expected_dataframe.to_numpy()) + + +def test_delaunay_triples_outfile(dataframe, expected_dataframe): + """ + Test triangulate.delaunay_triples with ``outfile``. + """ + with GMTTempFile(suffix=".txt") as tmpfile: + with pytest.warns(RuntimeWarning) as record: + result = triangulate.delaunay_triples(data=dataframe, outfile=tmpfile.name) + assert len(record) == 1 # check that only one warning was raised + assert result is None # return value is None + assert os.path.exists(path=tmpfile.name) + temp_df = pd.read_csv(filepath_or_buffer=tmpfile.name, sep="\t", header=None) + pd.testing.assert_frame_equal(left=temp_df, right=expected_dataframe) + + +def test_delaunay_triples_invalid_format(dataframe): + """ + Test that triangulate.delaunay_triples fails with incorrect format. + """ + with pytest.raises(GMTInvalidInput): + triangulate.delaunay_triples(data=dataframe, output_type=1) + + +def test_regular_grid_no_outgrid(dataframe, expected_grid): + """ + Run triangulate.regular_grid with no set outgrid and see it load into an + xarray.DataArray. + """ + data = dataframe.to_numpy() + output = triangulate.regular_grid(data=data, spacing=1, region=[2, 4, 5, 6]) + assert isinstance(output, xr.DataArray) + assert output.gmt.registration == 0 # Gridline registration + assert output.gmt.gtype == 0 # Cartesian type + xr.testing.assert_allclose(a=output, b=expected_grid) + + +def test_regular_grid_with_outgrid_param(dataframe, expected_grid): + """ + Run triangulate.regular_grid with the -Goutputfile.nc parameter. + """ + data = dataframe.to_numpy() + with GMTTempFile(suffix=".nc") as tmpfile: + output = triangulate.regular_grid( + data=data, spacing=1, region=[2, 4, 5, 6], outgrid=tmpfile.name + ) + assert output is None # check that output is None since outgrid is set + assert os.path.exists(path=tmpfile.name) # check that outgrid exists + with xr.open_dataarray(tmpfile.name) as grid: + assert isinstance(grid, xr.DataArray) + assert grid.gmt.registration == 0 # Gridline registration + assert grid.gmt.gtype == 0 # Cartesian type + xr.testing.assert_allclose(a=grid, b=expected_grid) + + +def test_regular_grid_invalid_format(dataframe): + """ + Test that triangulate.regular_grid fails with outgrid that is not None or a + proper file name. + """ + with pytest.raises(GMTInvalidInput): + triangulate.regular_grid(data=dataframe, outgrid=True)