diff --git a/atlxi_dhdt.ipynb b/atlxi_dhdt.ipynb index 90c54ff..2b05397 100644 --- a/atlxi_dhdt.ipynb +++ b/atlxi_dhdt.ipynb @@ -1009,40 +1009,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Plot 2D along track view of\n", - "# Ice Surface Height Changes over Time\n", - "fig = pygmt.Figure()\n", - "# Setup map frame, title, axis annotations, etc\n", - "fig.basemap(\n", - " projection=\"X30c/10c\",\n", - " region=[df.x_atc.min(), df.x_atc.max(), df.h_corr.min(), df.h_corr.max()],\n", - " frame=[\n", - " rf'WSne+t\"ICESat-2 Change in Ice Surface Height over Time at {region.name}\"',\n", - " 'xaf+l\"Along track x (m)\"',\n", - " 'yaf+l\"Height (m)\"',\n", - " ],\n", - ")\n", - "fig.text(\n", - " text=f\"Reference Ground Track {rgt:04d}\", position=\"TC\", offset=\"jTC0c/0.2c\", V=\"q\"\n", - ")\n", - "# Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7\n", - "cycle_colors = {3: \"#ff7f00\", 4: \"#984ea3\", 5: \"#4daf4a\", 6: \"#377eb8\", 7: \"#e41a1c\"}\n", - "for cycle, color in cycle_colors.items():\n", - " df_ = df.query(expr=\"cycle_number == @cycle\").copy()\n", - " if len(df_) > 0:\n", - " # Get x, y, time\n", - " data = np.column_stack(tup=(df_.x_atc, df_.h_corr))\n", - " time_nsec = df_.utc_time.mean()\n", - " time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit=\"s\")\n", - " label = f'\"Cycle {cycle} at {time_sec}\"'\n", - "\n", - " # Plot data points\n", - " fig.plot(data=data, style=\"c0.05c\", color=color, label=label)\n", - " # Plot line connecting points\n", - " # fig.plot(data=data, pen=f\"faint,{color},-\", label=f'\"+g-1l+s0.15c\"')\n", - "\n", - "fig.legend(S=3, position=\"JMR+JMR+o0.2c\", box=\"+gwhite+p1p\")\n", - "fig.savefig(f\"figures/alongtrack_atl11_dh_{placename}_{rgt}.png\")\n", + "# Plot 2D along track view of Ice Surface Height Changes over Time\n", + "fig = deepicedrain.plot_alongtrack(df=df, rgtpair=f\"{rgt:04d}\", regionname=region.name)\n", + "fig.savefig(fname=f\"figures/alongtrack_{placename}_{rgt}.png\")\n", "fig.show()" ] }, diff --git a/atlxi_dhdt.py b/atlxi_dhdt.py index 5399181..756f014 100644 --- a/atlxi_dhdt.py +++ b/atlxi_dhdt.py @@ -454,40 +454,9 @@ df = df.query(expr="abs(dhdt_slope) > 0.2 & h_corr < 300") # %% -# Plot 2D along track view of -# Ice Surface Height Changes over Time -fig = pygmt.Figure() -# Setup map frame, title, axis annotations, etc -fig.basemap( - projection="X30c/10c", - region=[df.x_atc.min(), df.x_atc.max(), df.h_corr.min(), df.h_corr.max()], - frame=[ - rf'WSne+t"ICESat-2 Change in Ice Surface Height over Time at {region.name}"', - 'xaf+l"Along track x (m)"', - 'yaf+l"Height (m)"', - ], -) -fig.text( - text=f"Reference Ground Track {rgt:04d}", position="TC", offset="jTC0c/0.2c", V="q" -) -# Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7 -cycle_colors = {3: "#ff7f00", 4: "#984ea3", 5: "#4daf4a", 6: "#377eb8", 7: "#e41a1c"} -for cycle, color in cycle_colors.items(): - df_ = df.query(expr="cycle_number == @cycle").copy() - if len(df_) > 0: - # Get x, y, time - data = np.column_stack(tup=(df_.x_atc, df_.h_corr)) - time_nsec = df_.utc_time.mean() - time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit="s") - label = f'"Cycle {cycle} at {time_sec}"' - - # Plot data points - fig.plot(data=data, style="c0.05c", color=color, label=label) - # Plot line connecting points - # fig.plot(data=data, pen=f"faint,{color},-", label=f'"+g-1l+s0.15c"') - -fig.legend(S=3, position="JMR+JMR+o0.2c", box="+gwhite+p1p") -fig.savefig(f"figures/alongtrack_atl11_dh_{placename}_{rgt}.png") +# Plot 2D along track view of Ice Surface Height Changes over Time +fig = deepicedrain.plot_alongtrack(df=df, rgtpair=f"{rgt:04d}", regionname=region.name) +fig.savefig(fname=f"figures/alongtrack_{placename}_{rgt}.png") fig.show() # %% diff --git a/atlxi_lake.ipynb b/atlxi_lake.ipynb index f01c027..8ba3d5d 100644 --- a/atlxi_lake.ipynb +++ b/atlxi_lake.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "source": [ "# **ICESat-2 Active Subglacial Lakes in Antarctica**\n", "\n", @@ -25,26 +27,30 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "import os\n", "\n", - "import numpy as np\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n", "\n", "import cudf\n", "import cuml\n", "import dask\n", "import dask.array\n", - "import deepicedrain\n", "import geopandas as gpd\n", - "import hvplot.cudf\n", + "import numpy as np\n", + "import pandas as pd\n", "import panel as pn\n", "import pygmt\n", "import scipy.spatial\n", "import shapely.geometry\n", "import tqdm\n", - "import zarr" + "import zarr\n", + "\n", + "import deepicedrain" ] }, { @@ -359,7 +365,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "# Subset data to lake of interest\n", @@ -367,6 +375,48 @@ "df_lake: cudf.DataFrame = region.subset(data=df_dhdt)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Select a few Reference Ground tracks to look at\n", + "rgts: list = [int(rgt) for rgt in lake.refgtracks.split(\"|\")]\n", + "print(f\"Looking at Reference Ground Tracks: {rgts}\")\n", + "os.makedirs(name=f\"figures/{placename}\", exist_ok=True)\n", + "\n", + "track_dict: dict = {}\n", + "rgt_groups = df_lake.groupby(by=\"referencegroundtrack\")\n", + "for rgt, df_rgt_wide in tqdm.tqdm(rgt_groups, total=len(rgt_groups.groups.keys())):\n", + " df_rgt: pd.DataFrame = deepicedrain.wide_to_long(\n", + " df=df_rgt_wide.to_pandas(),\n", + " stubnames=[\"h_corr\", \"utc_time\"],\n", + " j=\"cycle_number\",\n", + " )\n", + "\n", + " # Split one referencegroundtrack into 3 laser pair tracks pt1, pt2, pt3\n", + " df_rgt[\"pairtrack\"]: pd.Series = pd.cut(\n", + " x=df_rgt.y_atc, bins=[-np.inf, -100, 100, np.inf], labels=(\"pt1\", \"pt2\", \"pt3\")\n", + " )\n", + " pt_groups = df_rgt.groupby(by=\"pairtrack\")\n", + " for pairtrack, df_ in pt_groups:\n", + " if len(df_) > 0:\n", + " rgtpair = f\"{rgt:04d}_{pairtrack}\"\n", + " track_dict[rgtpair] = df_\n", + "\n", + " # Transect plot along a reference ground track\n", + " fig = deepicedrain.plot_alongtrack(\n", + " df=df_,\n", + " rgtpair=rgtpair,\n", + " regionname=region.name,\n", + " oldtonew=draining,\n", + " )\n", + " fig.savefig(\n", + " fname=f\"figures/{placename}/alongtrack_{placename}_{rgtpair}.png\"\n", + " )" + ] + }, { "cell_type": "markdown", "metadata": { @@ -477,7 +527,7 @@ "# Calculate crossover error\n", "df[\"h_X\"]: pd.Series = df.h_2 - df.h_1 # crossover error (i.e. height difference)\n", "df[\"t_D\"]: pd.Series = df.t_2 - df.t_1 # elapsed time in ns (i.e. time difference)\n", - "ns_in_yr: int = (365.25 * 24 * 60 * 60 * 1_000_000_000) # nanoseconds in a year\n", + "ns_in_yr: int = 365.25 * 24 * 60 * 60 * 1_000_000_000 # nanoseconds in a year\n", "df[\"dhdt\"]: pd.Series = df.h_X / (df.t_D.astype(np.int64) / ns_in_yr)" ] }, diff --git a/atlxi_lake.py b/atlxi_lake.py index 68ff520..9106848 100644 --- a/atlxi_lake.py +++ b/atlxi_lake.py @@ -32,18 +32,19 @@ # we will use state of the art GPU algorithms enabled by RAPIDS AI libraries, # or parallelize the processing across our HPC's many CPU cores using Dask. + # %% import os -import numpy as np +os.environ["CUDA_VISIBLE_DEVICES"] = "1" import cudf import cuml import dask import dask.array -import deepicedrain import geopandas as gpd -import hvplot.cudf +import numpy as np +import pandas as pd import panel as pn import pygmt import scipy.spatial @@ -51,6 +52,8 @@ import tqdm import zarr +import deepicedrain + # %% [markdown] # # Data Preparation @@ -242,6 +245,7 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series: f"ATLXI/df_dhdt_{placename.lower()}.parquet" ) + # %% # Antarctic subglacial lake polygons with EPSG:3031 coordinates antarctic_lakes: gpd.GeoDataFrame = gpd.read_file( @@ -268,6 +272,42 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series: df_lake: cudf.DataFrame = region.subset(data=df_dhdt) +# %% +# Select a few Reference Ground tracks to look at +rgts: list = [int(rgt) for rgt in lake.refgtracks.split("|")] +print(f"Looking at Reference Ground Tracks: {rgts}") +os.makedirs(name=f"figures/{placename}", exist_ok=True) + +track_dict: dict = {} +rgt_groups = df_lake.groupby(by="referencegroundtrack") +for rgt, df_rgt_wide in tqdm.tqdm(rgt_groups, total=len(rgt_groups.groups.keys())): + df_rgt: pd.DataFrame = deepicedrain.wide_to_long( + df=df_rgt_wide.to_pandas(), + stubnames=["h_corr", "utc_time"], + j="cycle_number", + ) + + # Split one referencegroundtrack into 3 laser pair tracks pt1, pt2, pt3 + df_rgt["pairtrack"]: pd.Series = pd.cut( + x=df_rgt.y_atc, bins=[-np.inf, -100, 100, np.inf], labels=("pt1", "pt2", "pt3") + ) + pt_groups = df_rgt.groupby(by="pairtrack") + for pairtrack, df_ in pt_groups: + if len(df_) > 0: + rgtpair = f"{rgt:04d}_{pairtrack}" + track_dict[rgtpair] = df_ + + # Transect plot along a reference ground track + fig = deepicedrain.plot_alongtrack( + df=df_, + rgtpair=rgtpair, + regionname=region.name, + oldtonew=draining, + ) + fig.savefig( + fname=f"figures/{placename}/alongtrack_{placename}_{rgtpair}.png" + ) + # %% [markdown] # # Crossover Track Analysis # @@ -348,7 +388,7 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series: # Calculate crossover error df["h_X"]: pd.Series = df.h_2 - df.h_1 # crossover error (i.e. height difference) df["t_D"]: pd.Series = df.t_2 - df.t_1 # elapsed time in ns (i.e. time difference) -ns_in_yr: int = (365.25 * 24 * 60 * 60 * 1_000_000_000) # nanoseconds in a year +ns_in_yr: int = 365.25 * 24 * 60 * 60 * 1_000_000_000 # nanoseconds in a year df["dhdt"]: pd.Series = df.h_X / (df.t_D.astype(np.int64) / ns_in_yr) # %% @@ -406,7 +446,6 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series: # %% # Tidy up dataframe first using pd.wide_to_long # I.e. convert 't_1', 't_2', 'h_1', 'h_2' columns into just 't' and 'h'. -df["id"] = df.index df_th: pd.DataFrame = deepicedrain.wide_to_long( df=df[["track1_track2", "x", "y", "t_1", "t_2", "h_1", "h_2"]], stubnames=["t", "h"], diff --git a/deepicedrain/README.md b/deepicedrain/README.md index e36e3ff..7f4a477 100644 --- a/deepicedrain/README.md +++ b/deepicedrain/README.md @@ -5,6 +5,7 @@ Contents: - :artificial_satellite: atlas_catalog.yaml - [intake](https://intake.readthedocs.io) data catalog for accessing ICESat-2 ATLAS datasets - icesat2atlasdownloader - Download Antarctic ICESat-2 ATLAS products from [NSIDC](https://nsidc.org/data/ICESat-2) - icesat2atl06 - Reads in ICESat-2 ATL06 data into an xarray.Dataset + - icesat2dhdt - Preprocessed ICESat-2 height change over time (dhdt) data in a columnar format - test_data - Sample ICESat-2 datasets for testing purposes - :1234: deltamath.py - Mathematical functions for calculating delta changes of some physical unit @@ -20,6 +21,8 @@ Contents: - :card_file_box: extraload.py - Convenience functions for extracting, transforming and loading data - array_to_dataframe - Turns a 1D/2D numpy/dask array into a tidy pandas/dask dataframe table - ndarray_to_parquet - Turns an n-dimensional xarray/zarr array into an a parquet columnar format + - wide_to_long - Turns a pandas dataframe table with many columns into one with many rows - :world_map: vizplots.py - Makes interactive dashboard plots and publication quality figures - IceSat2Explorer - Dashboard for interacting with ICESat-2 point clouds on a 2D map + - plot_alongtrack - Makes a 2D along track figure of height measurements taken at different cycle times diff --git a/deepicedrain/__init__.py b/deepicedrain/__init__.py index cfffbb9..2ac864a 100644 --- a/deepicedrain/__init__.py +++ b/deepicedrain/__init__.py @@ -1,8 +1,9 @@ import importlib.resources import logging -import deepicedrain import intake + +import deepicedrain from deepicedrain.deltamath import calculate_delta, nan_linregress, nanptp from deepicedrain.extraload import array_to_dataframe, ndarray_to_parquet, wide_to_long from deepicedrain.spatiotemporal import ( @@ -11,7 +12,7 @@ lonlat_to_xy, point_in_polygon_gpu, ) -from deepicedrain.vizplots import IceSat2Explorer +from deepicedrain.vizplots import IceSat2Explorer, plot_alongtrack __version__: str = "0.2.1" diff --git a/deepicedrain/extraload.py b/deepicedrain/extraload.py index 4470713..0f72920 100644 --- a/deepicedrain/extraload.py +++ b/deepicedrain/extraload.py @@ -135,7 +135,7 @@ def wide_to_long( index (the 'j' variable), while dropping NaN values too! Documentation for input arguments are the same as pd.wide_to_long. This - convenience functions just uses different default arguments for 'i' and + convenience function just uses different default arguments for 'i' and 'sep'. """ df[i] = df.index diff --git a/deepicedrain/tests/test_vizplots.py b/deepicedrain/tests/test_vizplots.py new file mode 100644 index 0000000..6c54276 --- /dev/null +++ b/deepicedrain/tests/test_vizplots.py @@ -0,0 +1,59 @@ +""" +Tests that various visualizations can be made to appear! +""" +import os +import tempfile + +import pandas as pd +import pytest +import xarray as xr +import pygmt.helpers.testing + +from deepicedrain import ( + catalog, + deltatime_to_utctime, + ndarray_to_parquet, + plot_alongtrack, + wide_to_long, +) + + +@pytest.fixture(scope="module", name="dataframe") +def fixture_dataframe(): + """ + Loads the sample ICESat-2 ATL11 data, and processes it into an suitable + pandas.DataFrame format. + """ + dataset: xr.Dataset = catalog.test_data.atl11_test_case.to_dask() + dataset["utc_time"] = deltatime_to_utctime(dataarray=dataset.delta_time) + + with tempfile.TemporaryDirectory() as tmpdir: + df: pd.DataFrame = ndarray_to_parquet( + ndarray=dataset, + parquetpath=os.path.join(tmpdir, "temp.parquet"), + variables=["longitude", "latitude", "h_corr", "utc_time"], + use_deprecated_int96_timestamps=True, + ) + dataframe: pd.DataFrame = wide_to_long( + df=df, stubnames=["h_corr", "utc_time"], j="cycle_number" + ) + return dataframe + + +@pygmt.helpers.testing.check_figures_equal() +def test_plot_alongtrack(dataframe): + """ + Tests that a 2D along track plot figure can be produced. Also make sure that + the default for oldtonew is True (i.e. legend shows Cycle 1 before Cycle 2). + """ + kwargs = dict( + df=dataframe, + rgtpair="788_pt2", + regionname="Greenland", + x_var="longitude", + spacing="0.1/5", + ) + fig_ref = plot_alongtrack(**kwargs, oldtonew=True) + fig_test = plot_alongtrack(**kwargs) + + return fig_ref, fig_test diff --git a/deepicedrain/vizplots.py b/deepicedrain/vizplots.py index f5ada8a..944da4c 100644 --- a/deepicedrain/vizplots.py +++ b/deepicedrain/vizplots.py @@ -6,12 +6,13 @@ import os import warnings -import numpy as np - import holoviews as hv import intake +import numpy as np +import pandas as pd import panel as pn import param +import pygmt warnings.filterwarnings( action="ignore", @@ -132,3 +133,106 @@ def widgets(self): pn.Column(_widgets[2], _widgets[3], align="center"), pn.Column(_widgets[4], _widgets[5], align="center"), ) + + +def plot_alongtrack( + df: pd.DataFrame, + rgtpair: str, + regionname: str, + x_var: str = "x_atc", + y_var: str = "h_corr", + time_var: str = "utc_time", + cycle_var: str = "cycle_number", + spacing: str = "1000/5", + oldtonew: bool = True, +) -> pygmt.Figure: + """ + Plot 2D along track view of Ice Surface Height Changes over Time. + Uses PyGMT to produce the figure. + + Parameters + ---------- + df : pandas.DataFrame + A table containing the ICESat-2 track data from multiple cycles. It + should ideally have columns called 'x_atc', 'h_corr', 'utc_time' and + 'cycle_number'. + x_var : str + The x-dimension column name to use from the table data, plotted + on the horizontal x-axis. Default is 'x_atc'. + y_var : str + The y-dimension column name to use from the table data, plotted + on the vertical x-axis. Default is 'h_corr'. + time_var : str + The time-dimension column name to use from the table data, used to + calculate the mean datetime for each track in every cycle. Default is + 'utc_time'. + cycle_var : str + The column name from the table which is used to determine which time + cycle each row/observation falls into. Default is 'cycle_number'. + spacing : str + Provide as 'dx/dy' increments, this is passed directly to `pygmt.info` + and used to round up and down the x and y axis limits for a nicer plot + frame. Default is '1000/5'. + oldtonew : bool + Determine the plot order (True: Cycle 1 -> Cycle n; False: Cycle n -> + Cycle 1), useful when you want the legend to go one way or the other. + For example, the default `oldtonew=True` is recommended when plotting + decreasing elevation over time (i.e. lake draining). Set to False + instead to reverse the order, recommended when plotting increasing + elevation over time (i.e. lake filling). + + Returns + ------- + fig : pygmt.Figure + A pygmt Figure instance containing the along track plot which can be + viewed using fig.show() or saved to a file using fig.savefig() + """ + fig = pygmt.Figure() + # Setup map frame, title, axis annotations, etc + fig.basemap( + projection="X30c/10c", + region=pygmt.info(table=df[[x_var, y_var]], spacing=spacing), + frame=[ + rf'WSne+t"ICESat-2 Change in Ice Surface Height over Time at {regionname}"', + 'xaf+l"Along track x (m)"', + 'yaf+l"Height (m)"', + ], + ) + fig.text( + text=f"Reference Ground Track {rgtpair}", + position="TC", + offset="jTC0c/0.2c", + V="q", + ) + # Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=9 + cycle_colors: dict = { + 1: "#999999", + 2: "#f781bf", + 3: "#a65628", + 4: "#ffff33", + 5: "#ff7f00", + 6: "#984ea3", + 7: "#4daf4a", + 8: "#377eb8", + 9: "#e41a1c", + } + # Choose only cycles that need to be plotted, reverse order if requested + cycles: list = sorted(df[cycle_var].unique(), reverse=not oldtonew) + cycle_colors: dict = {cycle: cycle_colors[cycle] for cycle in cycles} + + # For each cycle, plot the height values (y_var) along the track (x_var) + for cycle, color in cycle_colors.items(): + df_ = df.query(expr=f"{cycle_var} == @cycle").copy() + # Get x, y, time + data = np.column_stack(tup=(df_[x_var], df_[y_var])) + time_nsec = df_[time_var].mean() + time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit="s") + label = f'"Cycle {cycle} at {time_sec}"' + + # Plot data points + fig.plot(data=data, style="c0.05c", color=color, label=label) + # Plot line connecting points + # fig.plot(data=data, pen=f"faint,{color},-", label=f'"+g-1l+s0.15c"') + + fig.legend(S=3, position="JMR+JMR+o0.2c", box="+gwhite+p1p") + return fig