Skip to content

Commit

Permalink
push, first try on getting nwp data
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdudfield committed Dec 12, 2023
1 parent 0a401e4 commit 1d5052c
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 17 deletions.
142 changes: 142 additions & 0 deletions quartz_solar_forecast/eval/nwp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
""" Get nwp data from HF"""
import pandas as pd

import ocf_blosc2 # noqa
import xarray as xr
from huggingface_hub import HfFileSystem


def get_nwp(time_locations:pd.DataFrame):
"""
Get all the nwp data fpr the time locations
time_locations should have the following columns:
- timestamp
- latitude
- longitude
- pv_id
"""

all_nwp_dfs = []
for i, row in time_locations.iterrows():
print(f'{i} of {len(time_locations)}')
one_nwp_df = get_nwp_for_one_timestamp_one_location(row['timestamp'], row['latitude'], row['longitude'])

one_nwp_df['timestamp'] = row['timestamp']
one_nwp_df['pv_id'] = row['pv_id']
one_nwp_df['latitude'] = row['latitude']
one_nwp_df['longitude'] = row['longitude']

all_nwp_dfs.append(one_nwp_df)

all_nwp_df = pd.concat(all_nwp_dfs)

return all_nwp_df


def get_nwp_for_one_timestamp_one_location(
timestamp: pd.Timestamp, latitude, longitude
):
"""
Get NWP data from Hugging Face for one timestamp and one location
:param timestamp: the timestamp for when you want the forecast for
:param location: the location for when you want the forecast for
:return: nwp forecast in xarray
"""

# TODO add caching

fs = HfFileSystem()
# List which files are available. Not all dates, and model run times are available
# print(fs.ls("datasets/openclimatefix/dwd-icon-eu/data/2022/4/11/", detail=False))

# round timestamp to 6 hours floor
timestamp = timestamp.floor("6H")
year = timestamp.year
month = timestamp.month
day = timestamp.day
date_and_hour = timestamp.strftime("%Y%m%d_%H")

date = f"{year}/{month}/{day}"
file_location = f"{date}/{date_and_hour}.zarr.zip"
huggingface_route = "zip:///::hf://datasets/openclimatefix/dwd-icon-eu/data"
#huggingface_route = "datasets/openclimatefix/dwd-icon-eu/data"
huggingface_file = f"{huggingface_route}/{file_location}"

# TODO add cache so we only need to download this file once
# see if this file exists in the cache
# cache_dir = 'data/nwp'
# cache_file = f"{cache_dir}/{file_location}"
# if not os.path.exists(cache_file):
# # use fsspec to copy file
# print('copying file { from HF to local')
# os.makedirs(f'{cache_dir}/{date}', exist_ok=True)
# fs.get(f"{huggingface_route}/{file_location}", f"{cache_file}")

data = xr.open_zarr(
f"{huggingface_file}",
chunks="auto",
)

# take nearest location
data_at_location = data.sel(latitude=latitude, longitude=longitude, method="nearest")

# select the following variables
# "visibility": "vis",
# "windspeed_10m": "si10", from u and v
# "temperature_2m": "t_2m",
# "precipitation": "tot_prec",
# "shortwave_radiation": "aswdifd_s",
# "direct_radiation": "aswdir_s",
# "cloudcover_low": "clcl",
# "cloudcover_mid": "clcm",
# "cloudcover_high": "clch",
variables = ["t_2m", "tot_prec", "clch", "clcm", "clcl", "u", "v", "aswdir_s", "aswdifd_s"]
data_at_location = data_at_location[variables]

# choise the first isobaricInhPa
data_at_location = data_at_location.isel(isobaricInhPa=-1)

# reduce to 54 hours timestaps, this means there is at least a 48 hours forecast
data_at_location = data_at_location.isel(step=slice(0, 54))

# matke times from the init time + steps
times = pd.to_datetime(data_at_location.time.values) + pd.to_timedelta(
data_at_location.step.values, unit="h"
)

# load all the data, this can take about ~1 minute seconds
print(f"Loading dataset for {timestamp=} {longitude=} {latitude=}")
data_at_location.load()

# convert to pandas dataframe
df = pd.DataFrame(times, columns=["timestamp"])
for variable in variables:
print(variable)
df[variable] = data_at_location[variable].values

# make wind speed out of u and v
df["windspeed_10m"] = (df["u"] ** 2 + df["v"] ** 2) ** 0.5

# rename variables
df = df.rename(
columns={
"t_2m": "temperature_2m",
"tot_prec": "precipitation",
"aswdifd_s": "shortwave_radiation",
"aswdir_s": "direct_radiation",
"clcl": "cloudcover_low",
"clcm": "cloudcover_mid",
"clch": "cloudcover_high",
}
)

# add visbility for the moment
# TODO
df["visibility"] = 10000

# drop u and v
df = df.drop(columns=["u", "v"])

return df
30 changes: 14 additions & 16 deletions quartz_solar_forecast/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,28 @@
This contains 50 sites each with 50 timestamps to make 2500 samples in total.
"""
from quartz_solar_forecast.eval.nwp import get_nwp

def run_eval(testset_path):
# load testset from csv
testset = df.read_csv(testset_path)

# Extract generation data and metadata for specific sites and timestamps for the testset from Hugging Face. (Zak)


# Split data into PV inputs and ground truth. (Zak)
import pandas as pd


# Collect NWP data from Hugging Face, ICON. (Peter)


# Run forecast with PV and NWP inputs.
def run_eval(testset_path):
# load testset from csv
testset = pd.read_csv(testset_path)

# Extract generation data and metadata for specific sites and timestamps for the testset from Hugging Face. (Zak)

# Combine the forecast results with the ground truth (ts, id, horizon (in hours), pred, truth, diff)
# Split data into PV inputs and ground truth. (Zak)

# Collect NWP data from Hugging Face, ICON. (Peter)
nwp_df = get_nwp(testset)

# Save file
# Run forecast with PV and NWP inputs.

# Combine the forecast results with the ground truth (ts, id, horizon (in hours), pred, truth, diff)

# Calculate and print metrics: MAE
# Save file

# Calculate and print metrics: MAE

# Visulisations
# Visulisations
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pandas
xarray
pv-site-prediction
pydantic
pydantic
huggingface_hub # only for evaluation

0 comments on commit 1d5052c

Please sign in to comment.