Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add satellite #101

Merged
merged 40 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d8b41a6
add test satellite
peterdudfield Dec 4, 2023
f37b7d3
fix test
peterdudfield Dec 4, 2023
2fdcd25
format
peterdudfield Dec 4, 2023
bebbf7d
tidy
peterdudfield Dec 4, 2023
42b3bb1
get something basic working, for loading satellite data
peterdudfield Dec 4, 2023
338fe1c
lint
peterdudfield Dec 4, 2023
9711fc8
add comments
peterdudfield Dec 4, 2023
ba61407
add more satellite things
peterdudfield Dec 5, 2023
2c10022
add flexible option for tranformer lat lon order
peterdudfield Dec 5, 2023
69cf565
isort
peterdudfield Dec 5, 2023
17260b0
lint
peterdudfield Dec 5, 2023
e4946dd
add pyresample to requierements
peterdudfield Dec 5, 2023
3fa60f7
set deafult to no satellite data sources
peterdudfield Dec 5, 2023
faddd77
lint
peterdudfield Dec 5, 2023
6755891
add satellte to recent history model
peterdudfield Dec 5, 2023
23889dc
lint
peterdudfield Dec 5, 2023
7131fcf
add satellte test data
peterdudfield Dec 5, 2023
20ad116
add dummy satellite into test
peterdudfield Dec 5, 2023
0ad737d
add v9 in tests
peterdudfield Dec 5, 2023
6770070
tidy up
peterdudfield Dec 5, 2023
acf7166
add tests on utils
peterdudfield Dec 5, 2023
8df4d9c
add average option when slicing data
peterdudfield Dec 5, 2023
7b177e8
add patch size option
peterdudfield Dec 5, 2023
afa60fd
move getting patch to recent history model
peterdudfield Dec 6, 2023
603a751
update to patch size of 0.5
peterdudfield Dec 6, 2023
ef1a977
add satellite to uk_pv config
peterdudfield Dec 6, 2023
9e3db85
remove print
peterdudfield Dec 7, 2023
c88c7e5
satellite default patch size is 0.5, satellite fix for None
peterdudfield Dec 8, 2023
1c05ba3
add satellite seperate opening method
peterdudfield Dec 8, 2023
49b6cc1
add case when satellite data is None
peterdudfield Dec 8, 2023
868701a
add forecast horizon to features when using satellite
peterdudfield Dec 11, 2023
2ae8383
lint
peterdudfield Dec 11, 2023
4e6a58b
fix forecast horizon
peterdudfield Dec 11, 2023
86ec883
update sat
peterdudfield Dec 11, 2023
491b544
at other patches into hashing
peterdudfield Dec 11, 2023
1911421
add experiment results
peterdudfield Dec 19, 2023
625d95c
add to exp readme
peterdudfield Dec 19, 2023
ce915b1
PR comments
peterdudfield Dec 19, 2023
67fa5f5
PR comment, tidy up description
peterdudfield Dec 19, 2023
df43cdf
lint
peterdudfield Dec 19, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ This repo contains code to train and evaluate pv-site models.
├── data # Placeholder for data files
└── psp # Main python package
├── clients # Client specific code
├── data_sources # Data sources (PV, NWP, etc.)
├── data_sources # Data sources (PV, NWP, Satellite, etc.)
├── exp_configs # Experimentation configs - a config defines the different options for
│ # training and evaluation models. This directory contains many ready
│ # configs where the paths points to the data on Leonardo.
Expand Down
174 changes: 152 additions & 22 deletions poetry.lock

Large diffs are not rendered by default.

86 changes: 21 additions & 65 deletions psp/data_sources/nwp.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,20 @@
import datetime as dt
import logging
import pathlib
import pickle
from typing import Optional, TypeVar
from typing import Optional

# This import registers a codec.
import ocf_blosc2 # noqa
import xarray as xr

from psp.data_sources.utils import _STEP, _TIME, _VALUE, _VARIABLE, _X, _Y, slice_on_lat_lon
peterdudfield marked this conversation as resolved.
Show resolved Hide resolved
from psp.gis import CoordinateTransformer
from psp.typings import Timestamp
from psp.utils.dates import to_pydatetime
from psp.utils.hashing import naive_hash

T = TypeVar("T", bound=xr.Dataset | xr.DataArray)

_X = "x"
_Y = "y"
_TIME = "time"
_STEP = "step"
_VARIABLE = "variable"
_VALUE = "value"


def _slice_on_lat_lon(
data: T,
*,
min_lat: float | None = None,
max_lat: float | None = None,
min_lon: float | None = None,
max_lon: float | None = None,
nearest_lat: float | None = None,
nearest_lon: float | None = None,
transformer: CoordinateTransformer,
x_is_ascending: bool,
y_is_ascending: bool,
) -> T:
# Only allow `None` values for lat/lon if they are all None (in which case we don't filter
# by lat/lon).
num_none = sum([x is None for x in [min_lat, max_lat, min_lon, max_lon]])
assert num_none in [0, 4]

if min_lat is not None:
assert min_lat is not None
assert min_lon is not None
assert max_lat is not None
assert max_lon is not None

assert max_lat >= min_lat
assert max_lon >= min_lon

point1, point2 = transformer([(min_lat, min_lon), (max_lat, max_lon)])
min_x, min_y = point1
max_x, max_y = point2

if not x_is_ascending:
min_x, max_x = max_x, min_x
if not y_is_ascending:
min_y, max_y = max_y, min_y

# Type ignore because this is still simpler than addin some `@overload`.
return data.sel(x=slice(min_x, max_x), y=slice(min_y, max_y)) # type: ignore

elif nearest_lat is not None and nearest_lon is not None:
((x, y),) = transformer([(nearest_lat, nearest_lon)])

return data.sel(x=x, y=y, method="nearest") # type: ignore

return data
_log = logging.getLogger(__name__)


class NwpDataSource:
Expand All @@ -90,8 +38,9 @@ def __init__(
y_is_ascending: bool = True,
cache_dir: str | None = None,
lag_minutes: float = 0.0,
nwp_tolerance: Optional[str] = None,
nwp_variables: Optional[list[str]] = None,
tolerance: Optional[str] = None,
variables: Optional[list[str]] = None,
filter_on_step: Optional[bool] = True,
):
"""
Arguments:
Expand All @@ -115,6 +64,7 @@ def __init__(
nwp_tolerance: How old should the NWP predictions be before we start ignoring them.
See `NwpDataSource.get`'s documentation for details..
nwp_variables: Only use this subset of NWP variables. Defaults to using all.

"""
if isinstance(paths_or_data, str):
paths_or_data = [paths_or_data]
Expand All @@ -140,17 +90,21 @@ def __init__(

self._lag_minutes = lag_minutes

self._nwp_tolerance = nwp_tolerance
self._nwp_variables = nwp_variables
self._tolerance = tolerance
self._variables = variables

self._data = self._prepare_data(raw_data)
self.raw_data = raw_data

self._cache_dir = pathlib.Path(cache_dir) if cache_dir else None

if self._cache_dir:
self._cache_dir.mkdir(exist_ok=True)

self._filter_on_step = filter_on_step

def _open(self, paths: list[str]) -> xr.Dataset:
_log.debug(f"Opening data {paths}")
return xr.open_mfdataset(
paths,
engine="zarr",
Expand All @@ -176,8 +130,8 @@ def _prepare_data(self, data: xr.Dataset) -> xr.Dataset:
data = data.rename(rename_map)

# Filter data to keep only the variables in self._nwp_variables if it's not None
if self._nwp_variables is not None:
data = data.sel(variable=self._nwp_variables)
if self._variables is not None:
data = data.sel(variable=self._variables)

return data

Expand Down Expand Up @@ -260,6 +214,7 @@ def get(

# If it was not loaded from the cache, we load it from the original dataset.
if data is None:

data = self._get(
now=now,
timestamps=timestamps,
Expand Down Expand Up @@ -313,7 +268,7 @@ def _get(
assert tolerance is not None
return None

ds = _slice_on_lat_lon(
ds = slice_on_lat_lon(
peterdudfield marked this conversation as resolved.
Show resolved Hide resolved
ds,
min_lat=min_lat,
max_lat=max_lat,
Expand All @@ -331,8 +286,9 @@ def _get(
# How long after `time` do we need the predictions.
deltas = [t - init_time for t in timestamps]

# Get the nearest prediction to what we are interested in.
ds = ds.sel(step=deltas, method="nearest")
if self._filter_on_step:
# Get the nearest prediction to what we are interested in.
ds = ds.sel(step=deltas, method="nearest")

da = ds[_VALUE]

Expand Down
63 changes: 63 additions & 0 deletions psp/data_sources/satellite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import pyresample
import xarray as xr

from psp.data_sources.nwp import NwpDataSource
from psp.data_sources.utils import _TIME, _VALUE, _VARIABLE, _X, _Y
from psp.gis import CoordinateTransformer


class SatelliteDataSource(NwpDataSource):
def __init__(self, *args, **kwargs):
super().__init__(
*args,
**kwargs,
filter_on_step=False,
x_dim_name="x_geostationary",
y_dim_name="y_geostationary",
value_name="data",
)

# Get the coordinate transformer.# get crs
area_definition_yaml = self._data.value.attrs["area"]
geostationary_area_definition = pyresample.area_config.load_area_from_string(
area_definition_yaml
)
geostationary_crs = geostationary_area_definition.crs

# Get the coordinate transformer, from lat/lon to geostationary.
self._coordinate_transformer = CoordinateTransformer(from_=4326, to=geostationary_crs)

def prepare_data(self, data: xr.Dataset) -> xr.Dataset:
# Rename the dimensions.
rename_map: dict[str, str] = {}
for old, new in zip(
[
self._x_dim_name,
self._y_dim_name,
self._time_dim_name,
self._variable_dim_name,
self._value_name,
],
[_X, _Y, _TIME, _VARIABLE, _VALUE],
):
if old != new:
rename_map[old] = new

data = data.rename(rename_map)

# Filter data to keep only the variables in self._nwp_variables if it's not None
if self._variables is not None:
data = data.sel(variable=self._variables)

return data

def _open(self, paths: list[str]) -> xr.Dataset:
d = xr.open_mfdataset(
paths,
engine="zarr",
concat_dim="time",
combine="nested",
chunks="auto",
join="override",
)
return d
86 changes: 86 additions & 0 deletions psp/data_sources/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from typing import TypeVar

import xarray as xr

from psp.gis import CoordinateTransformer

_X = "x"
_Y = "y"
_TIME = "time"
_STEP = "step"
_VARIABLE = "variable"
_VALUE = "value"

T = TypeVar("T", bound=xr.Dataset | xr.DataArray)


def slice_on_lat_lon(
data: T,
*,
min_lat: float | None = None,
max_lat: float | None = None,
min_lon: float | None = None,
max_lon: float | None = None,
nearest_lat: float | None = None,
nearest_lon: float | None = None,
transformer: CoordinateTransformer,
x_is_ascending: bool,
y_is_ascending: bool,
) -> T:
"""
Slice the data on lat/lon

Args:
----
data: The data to slice
min_lat: The minimum latitude to slice on
max_lat: The maximum latitude to slice on
min_lon: The minimum longitude to slice on
max_lon: The maximum longitude to slice on
nearest_lat: The latitude to slice on
nearest_lon: The longitude to slice on
transformer: The transformer to use to convert lat/lon to x/y
x_is_ascending: Whether the x values are ascending
y_is_ascending: Whether the y values are ascending
do_average: Take average over the area, of x and y coordinates
"""
# Only allow `None` values for lat/lon if they are all None (in which case we don't filter
# by lat/lon).
num_none = sum([x is None for x in [min_lat, max_lat, min_lon, max_lon]])
assert num_none in [0, 4]

if min_lat is not None:
assert min_lat is not None
assert min_lon is not None
assert max_lat is not None
assert max_lon is not None

assert max_lat >= min_lat
assert max_lon >= min_lon

# This looks funny because when going from lat/lon to osgb we have to use
# (x, y) = transformer([(lat, lon)])
# however for lat/lon to geostationary we have to use
# (x_geo, y_geo) = transformer([(lon, lat)])

points = [(min_lat, min_lon), (max_lat, max_lon)]
point1, point2 = transformer(points)
min_x, min_y = point1
max_x, max_y = point2

if not x_is_ascending:
min_x, max_x = max_x, min_x
if not y_is_ascending:
min_y, max_y = max_y, min_y

new_data = data.sel(x=slice(min_x, max_x), y=slice(min_y, max_y))

# Type ignore because this is still simpler than adding some `@overload`.
return new_data # type: ignore

elif nearest_lat is not None and nearest_lon is not None:
((x, y),) = transformer([(nearest_lat, nearest_lon)])

return data.sel(x=x, y=y, method="nearest") # type: ignore

return data
4 changes: 2 additions & 2 deletions psp/exp_configs/island.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get_data_source_kwargs(self):
x_dim_name="latitude",
y_dim_name="longitude",
x_is_ascending=False,
nwp_tolerance=None,
tolerance=None,
peterdudfield marked this conversation as resolved.
Show resolved Hide resolved
),
"EXC": NwpDataSource(
EXC_PATH,
Expand All @@ -62,7 +62,7 @@ def get_data_source_kwargs(self):
x_is_ascending=True,
y_is_ascending=True,
lag_minutes=8 * 60,
nwp_tolerance=None,
tolerance=None,
),
},
)
Expand Down
6 changes: 3 additions & 3 deletions psp/exp_configs/mone.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get_data_source_kwargs(self):
value_name="UKV",
y_is_ascending=False,
# Those are the variables available in our prod environment.
nwp_variables=[
variables=[
"si10",
"vis",
# "r2",
Expand All @@ -64,7 +64,7 @@ def get_data_source_kwargs(self):
"mcc",
"lcc",
],
nwp_tolerance="168h",
tolerance="168h",
),
"EXC": NwpDataSource(
EXC_PATH,
Expand All @@ -75,7 +75,7 @@ def get_data_source_kwargs(self):
x_is_ascending=True,
y_is_ascending=True,
lag_minutes=8 * 60,
nwp_tolerance=None,
tolerance=None,
),
},
)
Expand Down
Loading