Skip to content

Commit

Permalink
📈 2D along track plots with reversible legend
Browse files Browse the repository at this point in the history
The icy water may be draining, but we're still keeping things DRY! Chucking all of the alongtrack code (originally from atlxi_dhdt.ipynb, sitting in atlxi_lake.ipynb secretly for a while, now refactored to near perfection) into a proper visualization function that is tested! The abstraction makes it easier to generate alongtrack plots when looping through different reference ground tracks (0001 to 1387) and pair tracks (pt1, pt2, pt3). Included a boolean oldtonew flag to allow for flipping the legend, useful for e.g. when lake is filling up over time and we want Cycle 7 to be above Cycle 6.

To be honest, test_vizplots.py could almost be treated as an integration/behavioural driven development test, but haven't got time to do that properly. Also took the opportunity to update some documentation in deepicedrain/README.md, including the icesat2dhdt catalog entry and wide_to_long function that was missed out before.
  • Loading branch information
weiji14 committed Sep 15, 2020
1 parent ef939bf commit f5c586a
Show file tree
Hide file tree
Showing 9 changed files with 280 additions and 86 deletions.
37 changes: 3 additions & 34 deletions atlxi_dhdt.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1009,40 +1009,9 @@
"metadata": {},
"outputs": [],
"source": [
"# Plot 2D along track view of\n",
"# Ice Surface Height Changes over Time\n",
"fig = pygmt.Figure()\n",
"# Setup map frame, title, axis annotations, etc\n",
"fig.basemap(\n",
" projection=\"X30c/10c\",\n",
" region=[df.x_atc.min(), df.x_atc.max(), df.h_corr.min(), df.h_corr.max()],\n",
" frame=[\n",
" rf'WSne+t\"ICESat-2 Change in Ice Surface Height over Time at {region.name}\"',\n",
" 'xaf+l\"Along track x (m)\"',\n",
" 'yaf+l\"Height (m)\"',\n",
" ],\n",
")\n",
"fig.text(\n",
" text=f\"Reference Ground Track {rgt:04d}\", position=\"TC\", offset=\"jTC0c/0.2c\", V=\"q\"\n",
")\n",
"# Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7\n",
"cycle_colors = {3: \"#ff7f00\", 4: \"#984ea3\", 5: \"#4daf4a\", 6: \"#377eb8\", 7: \"#e41a1c\"}\n",
"for cycle, color in cycle_colors.items():\n",
" df_ = df.query(expr=\"cycle_number == @cycle\").copy()\n",
" if len(df_) > 0:\n",
" # Get x, y, time\n",
" data = np.column_stack(tup=(df_.x_atc, df_.h_corr))\n",
" time_nsec = df_.utc_time.mean()\n",
" time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit=\"s\")\n",
" label = f'\"Cycle {cycle} at {time_sec}\"'\n",
"\n",
" # Plot data points\n",
" fig.plot(data=data, style=\"c0.05c\", color=color, label=label)\n",
" # Plot line connecting points\n",
" # fig.plot(data=data, pen=f\"faint,{color},-\", label=f'\"+g-1l+s0.15c\"')\n",
"\n",
"fig.legend(S=3, position=\"JMR+JMR+o0.2c\", box=\"+gwhite+p1p\")\n",
"fig.savefig(f\"figures/alongtrack_atl11_dh_{placename}_{rgt}.png\")\n",
"# Plot 2D along track view of Ice Surface Height Changes over Time\n",
"fig = deepicedrain.plot_alongtrack(df=df, rgtpair=f\"{rgt:04d}\", regionname=region.name)\n",
"fig.savefig(fname=f\"figures/alongtrack_{placename}_{rgt}.png\")\n",
"fig.show()"
]
},
Expand Down
37 changes: 3 additions & 34 deletions atlxi_dhdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,40 +454,9 @@
df = df.query(expr="abs(dhdt_slope) > 0.2 & h_corr < 300")

# %%
# Plot 2D along track view of
# Ice Surface Height Changes over Time
fig = pygmt.Figure()
# Setup map frame, title, axis annotations, etc
fig.basemap(
projection="X30c/10c",
region=[df.x_atc.min(), df.x_atc.max(), df.h_corr.min(), df.h_corr.max()],
frame=[
rf'WSne+t"ICESat-2 Change in Ice Surface Height over Time at {region.name}"',
'xaf+l"Along track x (m)"',
'yaf+l"Height (m)"',
],
)
fig.text(
text=f"Reference Ground Track {rgt:04d}", position="TC", offset="jTC0c/0.2c", V="q"
)
# Colors from https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7
cycle_colors = {3: "#ff7f00", 4: "#984ea3", 5: "#4daf4a", 6: "#377eb8", 7: "#e41a1c"}
for cycle, color in cycle_colors.items():
df_ = df.query(expr="cycle_number == @cycle").copy()
if len(df_) > 0:
# Get x, y, time
data = np.column_stack(tup=(df_.x_atc, df_.h_corr))
time_nsec = df_.utc_time.mean()
time_sec = np.datetime_as_string(arr=time_nsec.to_datetime64(), unit="s")
label = f'"Cycle {cycle} at {time_sec}"'

# Plot data points
fig.plot(data=data, style="c0.05c", color=color, label=label)
# Plot line connecting points
# fig.plot(data=data, pen=f"faint,{color},-", label=f'"+g-1l+s0.15c"')

fig.legend(S=3, position="JMR+JMR+o0.2c", box="+gwhite+p1p")
fig.savefig(f"figures/alongtrack_atl11_dh_{placename}_{rgt}.png")
# Plot 2D along track view of Ice Surface Height Changes over Time
fig = deepicedrain.plot_alongtrack(df=df, rgtpair=f"{rgt:04d}", regionname=region.name)
fig.savefig(fname=f"figures/alongtrack_{placename}_{rgt}.png")
fig.show()

# %%
66 changes: 58 additions & 8 deletions atlxi_lake.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"source": [
"# **ICESat-2 Active Subglacial Lakes in Antarctica**\n",
"\n",
Expand All @@ -25,26 +27,30 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"import os\n",
"\n",
"import numpy as np\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n",
"\n",
"import cudf\n",
"import cuml\n",
"import dask\n",
"import dask.array\n",
"import deepicedrain\n",
"import geopandas as gpd\n",
"import hvplot.cudf\n",
"import numpy as np\n",
"import pandas as pd\n",
"import panel as pn\n",
"import pygmt\n",
"import scipy.spatial\n",
"import shapely.geometry\n",
"import tqdm\n",
"import zarr"
"import zarr\n",
"\n",
"import deepicedrain"
]
},
{
Expand Down Expand Up @@ -359,14 +365,58 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"# Subset data to lake of interest\n",
"placename: str = region.name.lower().replace(\" \", \"_\")\n",
"df_lake: cudf.DataFrame = region.subset(data=df_dhdt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Select a few Reference Ground tracks to look at\n",
"rgts: list = [int(rgt) for rgt in lake.refgtracks.split(\"|\")]\n",
"print(f\"Looking at Reference Ground Tracks: {rgts}\")\n",
"os.makedirs(name=f\"figures/{placename}\", exist_ok=True)\n",
"\n",
"track_dict: dict = {}\n",
"rgt_groups = df_lake.groupby(by=\"referencegroundtrack\")\n",
"for rgt, df_rgt_wide in tqdm.tqdm(rgt_groups, total=len(rgt_groups.groups.keys())):\n",
" df_rgt: pd.DataFrame = deepicedrain.wide_to_long(\n",
" df=df_rgt_wide.to_pandas(),\n",
" stubnames=[\"h_corr\", \"utc_time\"],\n",
" j=\"cycle_number\",\n",
" )\n",
"\n",
" # Split one referencegroundtrack into 3 laser pair tracks pt1, pt2, pt3\n",
" df_rgt[\"pairtrack\"]: pd.Series = pd.cut(\n",
" x=df_rgt.y_atc, bins=[-np.inf, -100, 100, np.inf], labels=(\"pt1\", \"pt2\", \"pt3\")\n",
" )\n",
" pt_groups = df_rgt.groupby(by=\"pairtrack\")\n",
" for pairtrack, df_ in pt_groups:\n",
" if len(df_) > 0:\n",
" rgtpair = f\"{rgt:04d}_{pairtrack}\"\n",
" track_dict[rgtpair] = df_\n",
"\n",
" # Transect plot along a reference ground track\n",
" fig = deepicedrain.plot_alongtrack(\n",
" df=df_,\n",
" rgtpair=rgtpair,\n",
" regionname=region.name,\n",
" oldtonew=draining,\n",
" )\n",
" fig.savefig(\n",
" fname=f\"figures/{placename}/alongtrack_{placename}_{rgtpair}.png\"\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {
Expand Down Expand Up @@ -477,7 +527,7 @@
"# Calculate crossover error\n",
"df[\"h_X\"]: pd.Series = df.h_2 - df.h_1 # crossover error (i.e. height difference)\n",
"df[\"t_D\"]: pd.Series = df.t_2 - df.t_1 # elapsed time in ns (i.e. time difference)\n",
"ns_in_yr: int = (365.25 * 24 * 60 * 60 * 1_000_000_000) # nanoseconds in a year\n",
"ns_in_yr: int = 365.25 * 24 * 60 * 60 * 1_000_000_000 # nanoseconds in a year\n",
"df[\"dhdt\"]: pd.Series = df.h_X / (df.t_D.astype(np.int64) / ns_in_yr)"
]
},
Expand Down
49 changes: 44 additions & 5 deletions atlxi_lake.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,28 @@
# we will use state of the art GPU algorithms enabled by RAPIDS AI libraries,
# or parallelize the processing across our HPC's many CPU cores using Dask.


# %%
import os

import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import cudf
import cuml
import dask
import dask.array
import deepicedrain
import geopandas as gpd
import hvplot.cudf
import numpy as np
import pandas as pd
import panel as pn
import pygmt
import scipy.spatial
import shapely.geometry
import tqdm
import zarr

import deepicedrain

# %% [markdown]
# # Data Preparation

Expand Down Expand Up @@ -242,6 +245,7 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series:
f"ATLXI/df_dhdt_{placename.lower()}.parquet"
)


# %%
# Antarctic subglacial lake polygons with EPSG:3031 coordinates
antarctic_lakes: gpd.GeoDataFrame = gpd.read_file(
Expand All @@ -268,6 +272,42 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series:
df_lake: cudf.DataFrame = region.subset(data=df_dhdt)


# %%
# Select a few Reference Ground tracks to look at
rgts: list = [int(rgt) for rgt in lake.refgtracks.split("|")]
print(f"Looking at Reference Ground Tracks: {rgts}")
os.makedirs(name=f"figures/{placename}", exist_ok=True)

track_dict: dict = {}
rgt_groups = df_lake.groupby(by="referencegroundtrack")
for rgt, df_rgt_wide in tqdm.tqdm(rgt_groups, total=len(rgt_groups.groups.keys())):
df_rgt: pd.DataFrame = deepicedrain.wide_to_long(
df=df_rgt_wide.to_pandas(),
stubnames=["h_corr", "utc_time"],
j="cycle_number",
)

# Split one referencegroundtrack into 3 laser pair tracks pt1, pt2, pt3
df_rgt["pairtrack"]: pd.Series = pd.cut(
x=df_rgt.y_atc, bins=[-np.inf, -100, 100, np.inf], labels=("pt1", "pt2", "pt3")
)
pt_groups = df_rgt.groupby(by="pairtrack")
for pairtrack, df_ in pt_groups:
if len(df_) > 0:
rgtpair = f"{rgt:04d}_{pairtrack}"
track_dict[rgtpair] = df_

# Transect plot along a reference ground track
fig = deepicedrain.plot_alongtrack(
df=df_,
rgtpair=rgtpair,
regionname=region.name,
oldtonew=draining,
)
fig.savefig(
fname=f"figures/{placename}/alongtrack_{placename}_{rgtpair}.png"
)

# %% [markdown]
# # Crossover Track Analysis
#
Expand Down Expand Up @@ -348,7 +388,7 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series:
# Calculate crossover error
df["h_X"]: pd.Series = df.h_2 - df.h_1 # crossover error (i.e. height difference)
df["t_D"]: pd.Series = df.t_2 - df.t_1 # elapsed time in ns (i.e. time difference)
ns_in_yr: int = (365.25 * 24 * 60 * 60 * 1_000_000_000) # nanoseconds in a year
ns_in_yr: int = 365.25 * 24 * 60 * 60 * 1_000_000_000 # nanoseconds in a year
df["dhdt"]: pd.Series = df.h_X / (df.t_D.astype(np.int64) / ns_in_yr)

# %%
Expand Down Expand Up @@ -406,7 +446,6 @@ def find_clusters(X: cudf.core.dataframe.DataFrame) -> cudf.core.series.Series:
# %%
# Tidy up dataframe first using pd.wide_to_long
# I.e. convert 't_1', 't_2', 'h_1', 'h_2' columns into just 't' and 'h'.
df["id"] = df.index
df_th: pd.DataFrame = deepicedrain.wide_to_long(
df=df[["track1_track2", "x", "y", "t_1", "t_2", "h_1", "h_2"]],
stubnames=["t", "h"],
Expand Down
3 changes: 3 additions & 0 deletions deepicedrain/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Contents:
- :artificial_satellite: atlas_catalog.yaml - [intake](https://intake.readthedocs.io) data catalog for accessing ICESat-2 ATLAS datasets
- icesat2atlasdownloader - Download Antarctic ICESat-2 ATLAS products from [NSIDC](https://nsidc.org/data/ICESat-2)
- icesat2atl06 - Reads in ICESat-2 ATL06 data into an xarray.Dataset
- icesat2dhdt - Preprocessed ICESat-2 height change over time (dhdt) data in a columnar format
- test_data - Sample ICESat-2 datasets for testing purposes

- :1234: deltamath.py - Mathematical functions for calculating delta changes of some physical unit
Expand All @@ -20,6 +21,8 @@ Contents:
- :card_file_box: extraload.py - Convenience functions for extracting, transforming and loading data
- array_to_dataframe - Turns a 1D/2D numpy/dask array into a tidy pandas/dask dataframe table
- ndarray_to_parquet - Turns an n-dimensional xarray/zarr array into an a parquet columnar format
- wide_to_long - Turns a pandas dataframe table with many columns into one with many rows

- :world_map: vizplots.py - Makes interactive dashboard plots and publication quality figures
- IceSat2Explorer - Dashboard for interacting with ICESat-2 point clouds on a 2D map
- plot_alongtrack - Makes a 2D along track figure of height measurements taken at different cycle times
5 changes: 3 additions & 2 deletions deepicedrain/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import importlib.resources
import logging

import deepicedrain
import intake

import deepicedrain
from deepicedrain.deltamath import calculate_delta, nan_linregress, nanptp
from deepicedrain.extraload import array_to_dataframe, ndarray_to_parquet, wide_to_long
from deepicedrain.spatiotemporal import (
Expand All @@ -11,7 +12,7 @@
lonlat_to_xy,
point_in_polygon_gpu,
)
from deepicedrain.vizplots import IceSat2Explorer
from deepicedrain.vizplots import IceSat2Explorer, plot_alongtrack

__version__: str = "0.2.1"

Expand Down
2 changes: 1 addition & 1 deletion deepicedrain/extraload.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def wide_to_long(
index (the 'j' variable), while dropping NaN values too!
Documentation for input arguments are the same as pd.wide_to_long. This
convenience functions just uses different default arguments for 'i' and
convenience function just uses different default arguments for 'i' and
'sep'.
"""
df[i] = df.index
Expand Down
Loading

0 comments on commit f5c586a

Please sign in to comment.