📈 2D along track plots with reversible legend

The icy water may be draining, but we're still keeping things DRY! Chucking all of the alongtrack code (originally from atlxi_dhdt.ipynb, sitting in atlxi_lake.ipynb secretly for a while, now refactored to near perfection) into a proper visualization function that is tested! The abstraction makes it easier to generate alongtrack plots when looping through different reference ground tracks (0001 to 1387) and pair tracks (pt1, pt2, pt3). Included a boolean oldtonew flag to allow for flipping the legend, useful for e.g. when lake is filling up over time and we want Cycle 7 to be above Cycle 6. To be honest, test_vizplots.py could almost be treated as an integration/behavioural driven development test, but haven't got time to do that properly. Also took the opportunity to update some documentation in deepicedrain/README.md, including the icesat2dhdt catalog entry and wide_to_long function that was missed out before.
weiji14 · Sep 15, 2020 · f5c586a · f5c586a
1 parent ef939bf
commit f5c586a
Show file tree

Hide file tree

Showing 9 changed files with 280 additions and 86 deletions.
diff --git a/atlxi_dhdt.ipynb b/atlxi_dhdt.ipynb
@@ -1009,40 +1009,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Plot 2D along track view of\n",
-    "# Ice Surface Height Changes over Time\n",
-    "fig = pygmt.Figure()\n",
-    "# Setup map frame, title, axis annotations, etc\n",
-    "fig.basemap(\n",
-    "    projection=\"X30c/10c\",\n",
-    "    region=[df.x_atc.min(), df.x_atc.max(), df.h_corr.min(), df.h_corr.max()],\n",
-    "    frame=[\n",
-    "        rf'WSne+t\"ICESat-2 Change in Ice Surface Height over Time at {region.name}\"',\n",
-    "        'xaf+l\"Along track x (m)\"',\n",
-    "        'yaf+l\"Height (m)\"',\n",
-    "    ],\n",
-    ")\n",
-    "fig.text(\n",
-    "    text=f\"Reference Ground Track {rgt:04d}\", position=\"TC\", offset=\"jTC0c/0.2c\", V=\"q\"\n",
-    ")\n",
-    "# Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7\n",
-    "cycle_colors = {3: \"#ff7f00\", 4: \"#984ea3\", 5: \"#4daf4a\", 6: \"#377eb8\", 7: \"#e41a1c\"}\n",
-    "for cycle, color in cycle_colors.items():\n",
-    "    df_ = df.query(expr=\"cycle_number == @cycle\").copy()\n",
-    "    if len(df_) > 0:\n",
-    "        # Get x, y, time\n",
-    "        data = np.column_stack(tup=(df_.x_atc, df_.h_corr))\n",
-    "        time_nsec = df_.utc_time.mean()\n",
-    "        time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit=\"s\")\n",
-    "        label = f'\"Cycle {cycle} at {time_sec}\"'\n",
-    "\n",
-    "        # Plot data points\n",
-    "        fig.plot(data=data, style=\"c0.05c\", color=color, label=label)\n",
-    "        # Plot line connecting points\n",
-    "        # fig.plot(data=data, pen=f\"faint,{color},-\", label=f'\"+g-1l+s0.15c\"')\n",
-    "\n",
-    "fig.legend(S=3, position=\"JMR+JMR+o0.2c\", box=\"+gwhite+p1p\")\n",
-    "fig.savefig(f\"figures/alongtrack_atl11_dh_{placename}_{rgt}.png\")\n",
+    "# Plot 2D along track view of Ice Surface Height Changes over Time\n",
+    "fig = deepicedrain.plot_alongtrack(df=df, rgtpair=f\"{rgt:04d}\", regionname=region.name)\n",
+    "fig.savefig(fname=f\"figures/alongtrack_{placename}_{rgt}.png\")\n",
     "fig.show()"
    ]
   },

diff --git a/atlxi_dhdt.py b/atlxi_dhdt.py
@@ -454,40 +454,9 @@
 df = df.query(expr="abs(dhdt_slope) > 0.2 & h_corr < 300")
 
 # %%
-# Plot 2D along track view of
-# Ice Surface Height Changes over Time
-fig = pygmt.Figure()
-# Setup map frame, title, axis annotations, etc
-fig.basemap(
-    projection="X30c/10c",
-    region=[df.x_atc.min(), df.x_atc.max(), df.h_corr.min(), df.h_corr.max()],
-    frame=[
-        rf'WSne+t"ICESat-2 Change in Ice Surface Height over Time at {region.name}"',
-        'xaf+l"Along track x (m)"',
-        'yaf+l"Height (m)"',
-    ],
-)
-fig.text(
-    text=f"Reference Ground Track {rgt:04d}", position="TC", offset="jTC0c/0.2c", V="q"
-)
-# Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7
-cycle_colors = {3: "#ff7f00", 4: "#984ea3", 5: "#4daf4a", 6: "#377eb8", 7: "#e41a1c"}
-for cycle, color in cycle_colors.items():
-    df_ = df.query(expr="cycle_number == @cycle").copy()
-    if len(df_) > 0:
-        # Get x, y, time
-        data = np.column_stack(tup=(df_.x_atc, df_.h_corr))
-        time_nsec = df_.utc_time.mean()
-        time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit="s")
-        label = f'"Cycle {cycle} at {time_sec}"'
-
-        # Plot data points
-        fig.plot(data=data, style="c0.05c", color=color, label=label)
-        # Plot line connecting points
-        # fig.plot(data=data, pen=f"faint,{color},-", label=f'"+g-1l+s0.15c"')
-
-fig.legend(S=3, position="JMR+JMR+o0.2c", box="+gwhite+p1p")
-fig.savefig(f"figures/alongtrack_atl11_dh_{placename}_{rgt}.png")
+# Plot 2D along track view of Ice Surface Height Changes over Time
+fig = deepicedrain.plot_alongtrack(df=df, rgtpair=f"{rgt:04d}", regionname=region.name)
+fig.savefig(fname=f"figures/alongtrack_{placename}_{rgt}.png")
 fig.show()
 
 # %%
diff --git a/atlxi_lake.ipynb b/atlxi_lake.ipynb
@@ -2,7 +2,9 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "source": [
     "# **ICESat-2 Active Subglacial Lakes in Antarctica**\n",
     "\n",
@@ -25,26 +27,30 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
     "import os\n",
     "\n",
-    "import numpy as np\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n",
     "\n",
     "import cudf\n",
     "import cuml\n",
     "import dask\n",
     "import dask.array\n",
-    "import deepicedrain\n",
     "import geopandas as gpd\n",
-    "import hvplot.cudf\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
     "import panel as pn\n",
     "import pygmt\n",
     "import scipy.spatial\n",
     "import shapely.geometry\n",
     "import tqdm\n",
-    "import zarr"
+    "import zarr\n",
+    "\n",
+    "import deepicedrain"
    ]
   },
   {
@@ -359,14 +365,58 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
     "# Subset data to lake of interest\n",
     "placename: str = region.name.lower().replace(\" \", \"_\")\n",
     "df_lake: cudf.DataFrame = region.subset(data=df_dhdt)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Select a few Reference Ground tracks to look at\n",
+    "rgts: list = [int(rgt) for rgt in lake.refgtracks.split(\"|\")]\n",
+    "print(f\"Looking at Reference Ground Tracks: {rgts}\")\n",
+    "os.makedirs(name=f\"figures/{placename}\", exist_ok=True)\n",
+    "\n",
+    "track_dict: dict = {}\n",
+    "rgt_groups = df_lake.groupby(by=\"referencegroundtrack\")\n",
+    "for rgt, df_rgt_wide in tqdm.tqdm(rgt_groups, total=len(rgt_groups.groups.keys())):\n",
+    "    df_rgt: pd.DataFrame = deepicedrain.wide_to_long(\n",
+    "        df=df_rgt_wide.to_pandas(),\n",
+    "        stubnames=[\"h_corr\", \"utc_time\"],\n",
+    "        j=\"cycle_number\",\n",
+    "    )\n",
+    "\n",
+    "    # Split one referencegroundtrack into 3 laser pair tracks pt1, pt2, pt3\n",
+    "    df_rgt[\"pairtrack\"]: pd.Series = pd.cut(\n",
+    "        x=df_rgt.y_atc, bins=[-np.inf, -100, 100, np.inf], labels=(\"pt1\", \"pt2\", \"pt3\")\n",
+    "    )\n",
+    "    pt_groups = df_rgt.groupby(by=\"pairtrack\")\n",
+    "    for pairtrack, df_ in pt_groups:\n",
+    "        if len(df_) > 0:\n",
+    "            rgtpair = f\"{rgt:04d}_{pairtrack}\"\n",
+    "            track_dict[rgtpair] = df_\n",
+    "\n",
+    "            # Transect plot along a reference ground track\n",
+    "            fig = deepicedrain.plot_alongtrack(\n",
+    "                df=df_,\n",
+    "                rgtpair=rgtpair,\n",
+    "                regionname=region.name,\n",
+    "                oldtonew=draining,\n",
+    "            )\n",
+    "            fig.savefig(\n",
+    "                fname=f\"figures/{placename}/alongtrack_{placename}_{rgtpair}.png\"\n",
+    "            )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -477,7 +527,7 @@
     "# Calculate crossover error\n",
     "df[\"h_X\"]: pd.Series = df.h_2 - df.h_1  # crossover error (i.e. height difference)\n",
     "df[\"t_D\"]: pd.Series = df.t_2 - df.t_1  # elapsed time in ns (i.e. time difference)\n",
-    "ns_in_yr: int = (365.25 * 24 * 60 * 60 * 1_000_000_000)  # nanoseconds in a year\n",
+    "ns_in_yr: int = 365.25 * 24 * 60 * 60 * 1_000_000_000  # nanoseconds in a year\n",
     "df[\"dhdt\"]: pd.Series = df.h_X / (df.t_D.astype(np.int64) / ns_in_yr)"
    ]
   },

diff --git a/atlxi_lake.py b/atlxi_lake.py
@@ -32,25 +32,28 @@
 # we will use state of the art GPU algorithms enabled by RAPIDS AI libraries,
 # or parallelize the processing across our HPC's many CPU cores using Dask.
 
+
 # %%
 import os
 
-import numpy as np
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 
 import cudf
 import cuml
 import dask
 import dask.array
-import deepicedrain
 import geopandas as gpd
-import hvplot.cudf
+import numpy as np
+import pandas as pd
 import panel as pn
 import pygmt
 import scipy.spatial
 import shapely.geometry
 import tqdm
 import zarr
 
+import deepicedrain
+
 # %% [markdown]
 # # Data Preparation
 
@@ -242,6 +245,7 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series:
     f"ATLXI/df_dhdt_{placename.lower()}.parquet"
 )
 
+
 # %%
 # Antarctic subglacial lake polygons with EPSG:3031 coordinates
 antarctic_lakes: gpd.GeoDataFrame = gpd.read_file(
@@ -268,6 +272,42 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series:
 df_lake: cudf.DataFrame = region.subset(data=df_dhdt)
 
 
+# %%
+# Select a few Reference Ground tracks to look at
+rgts: list = [int(rgt) for rgt in lake.refgtracks.split("|")]
+print(f"Looking at Reference Ground Tracks: {rgts}")
+os.makedirs(name=f"figures/{placename}", exist_ok=True)
+
+track_dict: dict = {}
+rgt_groups = df_lake.groupby(by="referencegroundtrack")
+for rgt, df_rgt_wide in tqdm.tqdm(rgt_groups, total=len(rgt_groups.groups.keys())):
+    df_rgt: pd.DataFrame = deepicedrain.wide_to_long(
+        df=df_rgt_wide.to_pandas(),
+        stubnames=["h_corr", "utc_time"],
+        j="cycle_number",
+    )
+
+    # Split one referencegroundtrack into 3 laser pair tracks pt1, pt2, pt3
+    df_rgt["pairtrack"]: pd.Series = pd.cut(
+        x=df_rgt.y_atc, bins=[-np.inf, -100, 100, np.inf], labels=("pt1", "pt2", "pt3")
+    )
+    pt_groups = df_rgt.groupby(by="pairtrack")
+    for pairtrack, df_ in pt_groups:
+        if len(df_) > 0:
+            rgtpair = f"{rgt:04d}_{pairtrack}"
+            track_dict[rgtpair] = df_
+
+            # Transect plot along a reference ground track
+            fig = deepicedrain.plot_alongtrack(
+                df=df_,
+                rgtpair=rgtpair,
+                regionname=region.name,
+                oldtonew=draining,
+            )
+            fig.savefig(
+                fname=f"figures/{placename}/alongtrack_{placename}_{rgtpair}.png"
+            )
+
 # %% [markdown]
 # # Crossover Track Analysis
 #
@@ -348,7 +388,7 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series:
 # Calculate crossover error
 df["h_X"]: pd.Series = df.h_2 - df.h_1  # crossover error (i.e. height difference)
 df["t_D"]: pd.Series = df.t_2 - df.t_1  # elapsed time in ns (i.e. time difference)
-ns_in_yr: int = (365.25 * 24 * 60 * 60 * 1_000_000_000)  # nanoseconds in a year
+ns_in_yr: int = 365.25 * 24 * 60 * 60 * 1_000_000_000  # nanoseconds in a year
 df["dhdt"]: pd.Series = df.h_X / (df.t_D.astype(np.int64) / ns_in_yr)
 
 # %%
@@ -406,7 +446,6 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series:
 # %%
 # Tidy up dataframe first using pd.wide_to_long
 # I.e. convert 't_1', 't_2', 'h_1', 'h_2' columns into just 't' and 'h'.
-df["id"] = df.index
 df_th: pd.DataFrame = deepicedrain.wide_to_long(
     df=df[["track1_track2", "x", "y", "t_1", "t_2", "h_1", "h_2"]],
     stubnames=["t", "h"],

diff --git a/deepicedrain/README.md b/deepicedrain/README.md
@@ -5,6 +5,7 @@ Contents:
 - :artificial_satellite: atlas_catalog.yaml - [intake](https://intake.readthedocs.io) data catalog for accessing ICESat-2 ATLAS datasets
   - icesat2atlasdownloader - Download Antarctic ICESat-2 ATLAS products from [NSIDC](https://nsidc.org/data/ICESat-2)
   - icesat2atl06 - Reads in ICESat-2 ATL06 data into an xarray.Dataset
+  - icesat2dhdt - Preprocessed ICESat-2 height change over time (dhdt) data in a columnar format
   - test_data - Sample ICESat-2 datasets for testing purposes
 
 - :1234: deltamath.py - Mathematical functions for calculating delta changes of some physical unit
@@ -20,6 +21,8 @@ Contents:
 - :card_file_box: extraload.py - Convenience functions for extracting, transforming and loading data
   - array_to_dataframe - Turns a 1D/2D numpy/dask array into a tidy pandas/dask dataframe table
   - ndarray_to_parquet - Turns an n-dimensional xarray/zarr array into an a parquet columnar format
+  - wide_to_long - Turns a pandas dataframe table with many columns into one with many rows
 
 - :world_map: vizplots.py - Makes interactive dashboard plots and publication quality figures
   - IceSat2Explorer - Dashboard for interacting with ICESat-2 point clouds on a 2D map
+  - plot_alongtrack - Makes a 2D along track figure of height measurements taken at different cycle times
diff --git a/deepicedrain/__init__.py b/deepicedrain/__init__.py
@@ -1,8 +1,9 @@
 import importlib.resources
 import logging
 
-import deepicedrain
 import intake
+
+import deepicedrain
 from deepicedrain.deltamath import calculate_delta, nan_linregress, nanptp
 from deepicedrain.extraload import array_to_dataframe, ndarray_to_parquet, wide_to_long
 from deepicedrain.spatiotemporal import (
@@ -11,7 +12,7 @@
     lonlat_to_xy,
     point_in_polygon_gpu,
 )
-from deepicedrain.vizplots import IceSat2Explorer
+from deepicedrain.vizplots import IceSat2Explorer, plot_alongtrack
 
 __version__: str = "0.2.1"
 

diff --git a/deepicedrain/extraload.py b/deepicedrain/extraload.py
@@ -135,7 +135,7 @@ def wide_to_long(
     index (the 'j' variable), while dropping NaN values too!
 
     Documentation for input arguments are the same as pd.wide_to_long. This
-    convenience functions just uses different default arguments for 'i' and
+    convenience function just uses different default arguments for 'i' and
     'sep'.
     """
     df[i] = df.index