From f5c586a483f92bd071472bd7a8acc651ddf1fb07 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 15 Sep 2020 23:17:55 +1200 Subject: [PATCH] :chart_with_upwards_trend: 2D along track plots with reversible legend The icy water may be draining, but we're still keeping things DRY! Chucking all of the alongtrack code (originally from atlxi_dhdt.ipynb, sitting in atlxi_lake.ipynb secretly for a while, now refactored to near perfection) into a proper visualization function that is tested! The abstraction makes it easier to generate alongtrack plots when looping through different reference ground tracks (0001 to 1387) and pair tracks (pt1, pt2, pt3). Included a boolean oldtonew flag to allow for flipping the legend, useful for e.g. when lake is filling up over time and we want Cycle 7 to be above Cycle 6. To be honest, test_vizplots.py could almost be treated as an integration/behavioural driven development test, but haven't got time to do that properly. Also took the opportunity to update some documentation in deepicedrain/README.md, including the icesat2dhdt catalog entry and wide_to_long function that was missed out before. --- atlxi_dhdt.ipynb | 37 +--------- atlxi_dhdt.py | 37 +--------- atlxi_lake.ipynb | 66 ++++++++++++++--- atlxi_lake.py | 49 +++++++++++-- deepicedrain/README.md | 3 + deepicedrain/__init__.py | 5 +- deepicedrain/extraload.py | 2 +- deepicedrain/tests/test_vizplots.py | 59 +++++++++++++++ deepicedrain/vizplots.py | 108 +++++++++++++++++++++++++++- 9 files changed, 280 insertions(+), 86 deletions(-) create mode 100644 deepicedrain/tests/test_vizplots.py diff --git a/atlxi_dhdt.ipynb b/atlxi_dhdt.ipynb index 90c54ff..2b05397 100644 --- a/atlxi_dhdt.ipynb +++ b/atlxi_dhdt.ipynb @@ -1009,40 +1009,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Plot 2D along track view of\n", - "# Ice Surface Height Changes over Time\n", - "fig = pygmt.Figure()\n", - "# Setup map frame, title, axis annotations, etc\n", - "fig.basemap(\n", - " projection=\"X30c/10c\",\n", - " region=[df.x_atc.min(), df.x_atc.max(), df.h_corr.min(), df.h_corr.max()],\n", - " frame=[\n", - " rf'WSne+t\"ICESat-2 Change in Ice Surface Height over Time at {region.name}\"',\n", - " 'xaf+l\"Along track x (m)\"',\n", - " 'yaf+l\"Height (m)\"',\n", - " ],\n", - ")\n", - "fig.text(\n", - " text=f\"Reference Ground Track {rgt:04d}\", position=\"TC\", offset=\"jTC0c/0.2c\", V=\"q\"\n", - ")\n", - "# Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7\n", - "cycle_colors = {3: \"#ff7f00\", 4: \"#984ea3\", 5: \"#4daf4a\", 6: \"#377eb8\", 7: \"#e41a1c\"}\n", - "for cycle, color in cycle_colors.items():\n", - " df_ = df.query(expr=\"cycle_number == @cycle\").copy()\n", - " if len(df_) > 0:\n", - " # Get x, y, time\n", - " data = np.column_stack(tup=(df_.x_atc, df_.h_corr))\n", - " time_nsec = df_.utc_time.mean()\n", - " time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit=\"s\")\n", - " label = f'\"Cycle {cycle} at {time_sec}\"'\n", - "\n", - " # Plot data points\n", - " fig.plot(data=data, style=\"c0.05c\", color=color, label=label)\n", - " # Plot line connecting points\n", - " # fig.plot(data=data, pen=f\"faint,{color},-\", label=f'\"+g-1l+s0.15c\"')\n", - "\n", - "fig.legend(S=3, position=\"JMR+JMR+o0.2c\", box=\"+gwhite+p1p\")\n", - "fig.savefig(f\"figures/alongtrack_atl11_dh_{placename}_{rgt}.png\")\n", + "# Plot 2D along track view of Ice Surface Height Changes over Time\n", + "fig = deepicedrain.plot_alongtrack(df=df, rgtpair=f\"{rgt:04d}\", regionname=region.name)\n", + "fig.savefig(fname=f\"figures/alongtrack_{placename}_{rgt}.png\")\n", "fig.show()" ] }, diff --git a/atlxi_dhdt.py b/atlxi_dhdt.py index 5399181..756f014 100644 --- a/atlxi_dhdt.py +++ b/atlxi_dhdt.py @@ -454,40 +454,9 @@ df = df.query(expr="abs(dhdt_slope) > 0.2 & h_corr < 300") # %% -# Plot 2D along track view of -# Ice Surface Height Changes over Time -fig = pygmt.Figure() -# Setup map frame, title, axis annotations, etc -fig.basemap( - projection="X30c/10c", - region=[df.x_atc.min(), df.x_atc.max(), df.h_corr.min(), df.h_corr.max()], - frame=[ - rf'WSne+t"ICESat-2 Change in Ice Surface Height over Time at {region.name}"', - 'xaf+l"Along track x (m)"', - 'yaf+l"Height (m)"', - ], -) -fig.text( - text=f"Reference Ground Track {rgt:04d}", position="TC", offset="jTC0c/0.2c", V="q" -) -# Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7 -cycle_colors = {3: "#ff7f00", 4: "#984ea3", 5: "#4daf4a", 6: "#377eb8", 7: "#e41a1c"} -for cycle, color in cycle_colors.items(): - df_ = df.query(expr="cycle_number == @cycle").copy() - if len(df_) > 0: - # Get x, y, time - data = np.column_stack(tup=(df_.x_atc, df_.h_corr)) - time_nsec = df_.utc_time.mean() - time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit="s") - label = f'"Cycle {cycle} at {time_sec}"' - - # Plot data points - fig.plot(data=data, style="c0.05c", color=color, label=label) - # Plot line connecting points - # fig.plot(data=data, pen=f"faint,{color},-", label=f'"+g-1l+s0.15c"') - -fig.legend(S=3, position="JMR+JMR+o0.2c", box="+gwhite+p1p") -fig.savefig(f"figures/alongtrack_atl11_dh_{placename}_{rgt}.png") +# Plot 2D along track view of Ice Surface Height Changes over Time +fig = deepicedrain.plot_alongtrack(df=df, rgtpair=f"{rgt:04d}", regionname=region.name) +fig.savefig(fname=f"figures/alongtrack_{placename}_{rgt}.png") fig.show() # %% diff --git a/atlxi_lake.ipynb b/atlxi_lake.ipynb index f01c027..8ba3d5d 100644 --- a/atlxi_lake.ipynb +++ b/atlxi_lake.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "source": [ "# **ICESat-2 Active Subglacial Lakes in Antarctica**\n", "\n", @@ -25,26 +27,30 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "import os\n", "\n", - "import numpy as np\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n", "\n", "import cudf\n", "import cuml\n", "import dask\n", "import dask.array\n", - "import deepicedrain\n", "import geopandas as gpd\n", - "import hvplot.cudf\n", + "import numpy as np\n", + "import pandas as pd\n", "import panel as pn\n", "import pygmt\n", "import scipy.spatial\n", "import shapely.geometry\n", "import tqdm\n", - "import zarr" + "import zarr\n", + "\n", + "import deepicedrain" ] }, { @@ -359,7 +365,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "# Subset data to lake of interest\n", @@ -367,6 +375,48 @@ "df_lake: cudf.DataFrame = region.subset(data=df_dhdt)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Select a few Reference Ground tracks to look at\n", + "rgts: list = [int(rgt) for rgt in lake.refgtracks.split(\"|\")]\n", + "print(f\"Looking at Reference Ground Tracks: {rgts}\")\n", + "os.makedirs(name=f\"figures/{placename}\", exist_ok=True)\n", + "\n", + "track_dict: dict = {}\n", + "rgt_groups = df_lake.groupby(by=\"referencegroundtrack\")\n", + "for rgt, df_rgt_wide in tqdm.tqdm(rgt_groups, total=len(rgt_groups.groups.keys())):\n", + " df_rgt: pd.DataFrame = deepicedrain.wide_to_long(\n", + " df=df_rgt_wide.to_pandas(),\n", + " stubnames=[\"h_corr\", \"utc_time\"],\n", + " j=\"cycle_number\",\n", + " )\n", + "\n", + " # Split one referencegroundtrack into 3 laser pair tracks pt1, pt2, pt3\n", + " df_rgt[\"pairtrack\"]: pd.Series = pd.cut(\n", + " x=df_rgt.y_atc, bins=[-np.inf, -100, 100, np.inf], labels=(\"pt1\", \"pt2\", \"pt3\")\n", + " )\n", + " pt_groups = df_rgt.groupby(by=\"pairtrack\")\n", + " for pairtrack, df_ in pt_groups:\n", + " if len(df_) > 0:\n", + " rgtpair = f\"{rgt:04d}_{pairtrack}\"\n", + " track_dict[rgtpair] = df_\n", + "\n", + " # Transect plot along a reference ground track\n", + " fig = deepicedrain.plot_alongtrack(\n", + " df=df_,\n", + " rgtpair=rgtpair,\n", + " regionname=region.name,\n", + " oldtonew=draining,\n", + " )\n", + " fig.savefig(\n", + " fname=f\"figures/{placename}/alongtrack_{placename}_{rgtpair}.png\"\n", + " )" + ] + }, { "cell_type": "markdown", "metadata": { @@ -477,7 +527,7 @@ "# Calculate crossover error\n", "df[\"h_X\"]: pd.Series = df.h_2 - df.h_1 # crossover error (i.e. height difference)\n", "df[\"t_D\"]: pd.Series = df.t_2 - df.t_1 # elapsed time in ns (i.e. time difference)\n", - "ns_in_yr: int = (365.25 * 24 * 60 * 60 * 1_000_000_000) # nanoseconds in a year\n", + "ns_in_yr: int = 365.25 * 24 * 60 * 60 * 1_000_000_000 # nanoseconds in a year\n", "df[\"dhdt\"]: pd.Series = df.h_X / (df.t_D.astype(np.int64) / ns_in_yr)" ] }, diff --git a/atlxi_lake.py b/atlxi_lake.py index 68ff520..9106848 100644 --- a/atlxi_lake.py +++ b/atlxi_lake.py @@ -32,18 +32,19 @@ # we will use state of the art GPU algorithms enabled by RAPIDS AI libraries, # or parallelize the processing across our HPC's many CPU cores using Dask. + # %% import os -import numpy as np +os.environ["CUDA_VISIBLE_DEVICES"] = "1" import cudf import cuml import dask import dask.array -import deepicedrain import geopandas as gpd -import hvplot.cudf +import numpy as np +import pandas as pd import panel as pn import pygmt import scipy.spatial @@ -51,6 +52,8 @@ import tqdm import zarr +import deepicedrain + # %% [markdown] # # Data Preparation @@ -242,6 +245,7 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series: f"ATLXI/df_dhdt_{placename.lower()}.parquet" ) + # %% # Antarctic subglacial lake polygons with EPSG:3031 coordinates antarctic_lakes: gpd.GeoDataFrame = gpd.read_file( @@ -268,6 +272,42 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series: df_lake: cudf.DataFrame = region.subset(data=df_dhdt) +# %% +# Select a few Reference Ground tracks to look at +rgts: list = [int(rgt) for rgt in lake.refgtracks.split("|")] +print(f"Looking at Reference Ground Tracks: {rgts}") +os.makedirs(name=f"figures/{placename}", exist_ok=True) + +track_dict: dict = {} +rgt_groups = df_lake.groupby(by="referencegroundtrack") +for rgt, df_rgt_wide in tqdm.tqdm(rgt_groups, total=len(rgt_groups.groups.keys())): + df_rgt: pd.DataFrame = deepicedrain.wide_to_long( + df=df_rgt_wide.to_pandas(), + stubnames=["h_corr", "utc_time"], + j="cycle_number", + ) + + # Split one referencegroundtrack into 3 laser pair tracks pt1, pt2, pt3 + df_rgt["pairtrack"]: pd.Series = pd.cut( + x=df_rgt.y_atc, bins=[-np.inf, -100, 100, np.inf], labels=("pt1", "pt2", "pt3") + ) + pt_groups = df_rgt.groupby(by="pairtrack") + for pairtrack, df_ in pt_groups: + if len(df_) > 0: + rgtpair = f"{rgt:04d}_{pairtrack}" + track_dict[rgtpair] = df_ + + # Transect plot along a reference ground track + fig = deepicedrain.plot_alongtrack( + df=df_, + rgtpair=rgtpair, + regionname=region.name, + oldtonew=draining, + ) + fig.savefig( + fname=f"figures/{placename}/alongtrack_{placename}_{rgtpair}.png" + ) + # %% [markdown] # # Crossover Track Analysis # @@ -348,7 +388,7 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series: # Calculate crossover error df["h_X"]: pd.Series = df.h_2 - df.h_1 # crossover error (i.e. height difference) df["t_D"]: pd.Series = df.t_2 - df.t_1 # elapsed time in ns (i.e. time difference) -ns_in_yr: int = (365.25 * 24 * 60 * 60 * 1_000_000_000) # nanoseconds in a year +ns_in_yr: int = 365.25 * 24 * 60 * 60 * 1_000_000_000 # nanoseconds in a year df["dhdt"]: pd.Series = df.h_X / (df.t_D.astype(np.int64) / ns_in_yr) # %% @@ -406,7 +446,6 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series: # %% # Tidy up dataframe first using pd.wide_to_long # I.e. convert 't_1', 't_2', 'h_1', 'h_2' columns into just 't' and 'h'. -df["id"] = df.index df_th: pd.DataFrame = deepicedrain.wide_to_long( df=df[["track1_track2", "x", "y", "t_1", "t_2", "h_1", "h_2"]], stubnames=["t", "h"], diff --git a/deepicedrain/README.md b/deepicedrain/README.md index e36e3ff..7f4a477 100644 --- a/deepicedrain/README.md +++ b/deepicedrain/README.md @@ -5,6 +5,7 @@ Contents: - :artificial_satellite: atlas_catalog.yaml - [intake](https://intake.readthedocs.io) data catalog for accessing ICESat-2 ATLAS datasets - icesat2atlasdownloader - Download Antarctic ICESat-2 ATLAS products from [NSIDC](https://nsidc.org/data/ICESat-2) - icesat2atl06 - Reads in ICESat-2 ATL06 data into an xarray.Dataset + - icesat2dhdt - Preprocessed ICESat-2 height change over time (dhdt) data in a columnar format - test_data - Sample ICESat-2 datasets for testing purposes - :1234: deltamath.py - Mathematical functions for calculating delta changes of some physical unit @@ -20,6 +21,8 @@ Contents: - :card_file_box: extraload.py - Convenience functions for extracting, transforming and loading data - array_to_dataframe - Turns a 1D/2D numpy/dask array into a tidy pandas/dask dataframe table - ndarray_to_parquet - Turns an n-dimensional xarray/zarr array into an a parquet columnar format + - wide_to_long - Turns a pandas dataframe table with many columns into one with many rows - :world_map: vizplots.py - Makes interactive dashboard plots and publication quality figures - IceSat2Explorer - Dashboard for interacting with ICESat-2 point clouds on a 2D map + - plot_alongtrack - Makes a 2D along track figure of height measurements taken at different cycle times diff --git a/deepicedrain/__init__.py b/deepicedrain/__init__.py index cfffbb9..2ac864a 100644 --- a/deepicedrain/__init__.py +++ b/deepicedrain/__init__.py @@ -1,8 +1,9 @@ import importlib.resources import logging -import deepicedrain import intake + +import deepicedrain from deepicedrain.deltamath import calculate_delta, nan_linregress, nanptp from deepicedrain.extraload import array_to_dataframe, ndarray_to_parquet, wide_to_long from deepicedrain.spatiotemporal import ( @@ -11,7 +12,7 @@ lonlat_to_xy, point_in_polygon_gpu, ) -from deepicedrain.vizplots import IceSat2Explorer +from deepicedrain.vizplots import IceSat2Explorer, plot_alongtrack __version__: str = "0.2.1" diff --git a/deepicedrain/extraload.py b/deepicedrain/extraload.py index 4470713..0f72920 100644 --- a/deepicedrain/extraload.py +++ b/deepicedrain/extraload.py @@ -135,7 +135,7 @@ def wide_to_long( index (the 'j' variable), while dropping NaN values too! Documentation for input arguments are the same as pd.wide_to_long. This - convenience functions just uses different default arguments for 'i' and + convenience function just uses different default arguments for 'i' and 'sep'. """ df[i] = df.index diff --git a/deepicedrain/tests/test_vizplots.py b/deepicedrain/tests/test_vizplots.py new file mode 100644 index 0000000..6c54276 --- /dev/null +++ b/deepicedrain/tests/test_vizplots.py @@ -0,0 +1,59 @@ +""" +Tests that various visualizations can be made to appear! +""" +import os +import tempfile + +import pandas as pd +import pytest +import xarray as xr +import pygmt.helpers.testing + +from deepicedrain import ( + catalog, + deltatime_to_utctime, + ndarray_to_parquet, + plot_alongtrack, + wide_to_long, +) + + +@pytest.fixture(scope="module", name="dataframe") +def fixture_dataframe(): + """ + Loads the sample ICESat-2 ATL11 data, and processes it into an suitable + pandas.DataFrame format. + """ + dataset: xr.Dataset = catalog.test_data.atl11_test_case.to_dask() + dataset["utc_time"] = deltatime_to_utctime(dataarray=dataset.delta_time) + + with tempfile.TemporaryDirectory() as tmpdir: + df: pd.DataFrame = ndarray_to_parquet( + ndarray=dataset, + parquetpath=os.path.join(tmpdir, "temp.parquet"), + variables=["longitude", "latitude", "h_corr", "utc_time"], + use_deprecated_int96_timestamps=True, + ) + dataframe: pd.DataFrame = wide_to_long( + df=df, stubnames=["h_corr", "utc_time"], j="cycle_number" + ) + return dataframe + + +@pygmt.helpers.testing.check_figures_equal() +def test_plot_alongtrack(dataframe): + """ + Tests that a 2D along track plot figure can be produced. Also make sure that + the default for oldtonew is True (i.e. legend shows Cycle 1 before Cycle 2). + """ + kwargs = dict( + df=dataframe, + rgtpair="788_pt2", + regionname="Greenland", + x_var="longitude", + spacing="0.1/5", + ) + fig_ref = plot_alongtrack(**kwargs, oldtonew=True) + fig_test = plot_alongtrack(**kwargs) + + return fig_ref, fig_test diff --git a/deepicedrain/vizplots.py b/deepicedrain/vizplots.py index f5ada8a..944da4c 100644 --- a/deepicedrain/vizplots.py +++ b/deepicedrain/vizplots.py @@ -6,12 +6,13 @@ import os import warnings -import numpy as np - import holoviews as hv import intake +import numpy as np +import pandas as pd import panel as pn import param +import pygmt warnings.filterwarnings( action="ignore", @@ -132,3 +133,106 @@ def widgets(self): pn.Column(_widgets[2], _widgets[3], align="center"), pn.Column(_widgets[4], _widgets[5], align="center"), ) + + +def plot_alongtrack( + df: pd.DataFrame, + rgtpair: str, + regionname: str, + x_var: str = "x_atc", + y_var: str = "h_corr", + time_var: str = "utc_time", + cycle_var: str = "cycle_number", + spacing: str = "1000/5", + oldtonew: bool = True, +) -> pygmt.Figure: + """ + Plot 2D along track view of Ice Surface Height Changes over Time. + Uses PyGMT to produce the figure. + + Parameters + ---------- + df : pandas.DataFrame + A table containing the ICESat-2 track data from multiple cycles. It + should ideally have columns called 'x_atc', 'h_corr', 'utc_time' and + 'cycle_number'. + x_var : str + The x-dimension column name to use from the table data, plotted + on the horizontal x-axis. Default is 'x_atc'. + y_var : str + The y-dimension column name to use from the table data, plotted + on the vertical x-axis. Default is 'h_corr'. + time_var : str + The time-dimension column name to use from the table data, used to + calculate the mean datetime for each track in every cycle. Default is + 'utc_time'. + cycle_var : str + The column name from the table which is used to determine which time + cycle each row/observation falls into. Default is 'cycle_number'. + spacing : str + Provide as 'dx/dy' increments, this is passed directly to `pygmt.info` + and used to round up and down the x and y axis limits for a nicer plot + frame. Default is '1000/5'. + oldtonew : bool + Determine the plot order (True: Cycle 1 -> Cycle n; False: Cycle n -> + Cycle 1), useful when you want the legend to go one way or the other. + For example, the default `oldtonew=True` is recommended when plotting + decreasing elevation over time (i.e. lake draining). Set to False + instead to reverse the order, recommended when plotting increasing + elevation over time (i.e. lake filling). + + Returns + ------- + fig : pygmt.Figure + A pygmt Figure instance containing the along track plot which can be + viewed using fig.show() or saved to a file using fig.savefig() + """ + fig = pygmt.Figure() + # Setup map frame, title, axis annotations, etc + fig.basemap( + projection="X30c/10c", + region=pygmt.info(table=df[[x_var, y_var]], spacing=spacing), + frame=[ + rf'WSne+t"ICESat-2 Change in Ice Surface Height over Time at {regionname}"', + 'xaf+l"Along track x (m)"', + 'yaf+l"Height (m)"', + ], + ) + fig.text( + text=f"Reference Ground Track {rgtpair}", + position="TC", + offset="jTC0c/0.2c", + V="q", + ) + # Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=9 + cycle_colors: dict = { + 1: "#999999", + 2: "#f781bf", + 3: "#a65628", + 4: "#ffff33", + 5: "#ff7f00", + 6: "#984ea3", + 7: "#4daf4a", + 8: "#377eb8", + 9: "#e41a1c", + } + # Choose only cycles that need to be plotted, reverse order if requested + cycles: list = sorted(df[cycle_var].unique(), reverse=not oldtonew) + cycle_colors: dict = {cycle: cycle_colors[cycle] for cycle in cycles} + + # For each cycle, plot the height values (y_var) along the track (x_var) + for cycle, color in cycle_colors.items(): + df_ = df.query(expr=f"{cycle_var} == @cycle").copy() + # Get x, y, time + data = np.column_stack(tup=(df_[x_var], df_[y_var])) + time_nsec = df_[time_var].mean() + time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit="s") + label = f'"Cycle {cycle} at {time_sec}"' + + # Plot data points + fig.plot(data=data, style="c0.05c", color=color, label=label) + # Plot line connecting points + # fig.plot(data=data, pen=f"faint,{color},-", label=f'"+g-1l+s0.15c"') + + fig.legend(S=3, position="JMR+JMR+o0.2c", box="+gwhite+p1p") + return fig