add pv get metadata

openclimatefix · Dec 12, 2023 · 9310070 · 9310070
1 parent d5bac61
commit 9310070
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 2 deletions.
diff --git a/quartz_solar_forecast/eval/pv.py b/quartz_solar_forecast/eval/pv.py
@@ -0,0 +1,39 @@
+import os
+
+import pandas as pd
+from huggingface_hub import HfFileSystem
+
+fs = HfFileSystem()
+
+
+def get_pv_metadata(testset: pd.DataFrame):
+
+    # download from hugginface or load from cache
+    cache_dir = "data/pv"
+    metadata_file = f"{cache_dir}/metadata.csv"
+    if not os.path.exists(metadata_file):
+        os.makedirs(cache_dir, exist_ok=True)
+        fs.get("datasets/openclimatefix/uk_pv/metadata.csv", metadata_file)
+
+    # Load in the dataset
+    metadata_df = pd.read_csv(metadata_file)
+
+    # join metadata with testset
+    metadata_df = metadata_df.rename(columns={"ss_id": "pv_id"})
+    combined_data = testset.merge(metadata_df, on="pv_id", how="left")
+
+    # only keep the columns we need
+    combined_data = combined_data[
+        ["pv_id", "datetime", "latitude_rounded", "longitude_rounded", "kwp"]
+    ]
+
+    # rename latitude_rounded to latitude and longitude_rounded to longitude
+    combined_data = combined_data.rename(
+        columns={
+            "latitude_rounded": "latitude",
+            "longitude_rounded": "longitude",
+            "kwp": "capacity",
+        }
+    )
+
+    return combined_data
diff --git a/quartz_solar_forecast/evaluation.py b/quartz_solar_forecast/evaluation.py
@@ -9,15 +9,17 @@
 from quartz_solar_forecast.eval.nwp import get_nwp
 from quartz_solar_forecast.eval.forecast import run_forecast
 from quartz_solar_forecast.eval.utils import combine_forecast_ground_truth
+from quartz_solar_forecast.eval.pv import get_pv_metadata
 
 import pandas as pd
 
 
-def run_eval(testset_path):
+def run_eval(testset_path = 'quartz_solar_forecast/dataset/testset.csv'):
     # load testset from csv
     testset = pd.read_csv(testset_path)
 
     # Extract generation data and metadata for specific sites and timestamps for the testset from Hugging Face. (Zak)
+    pv_metadata=get_pv_metadata(testset)
 
     # Split data into PV inputs and ground truth. (Zak)
     ground_truth_df = None # TODO
@@ -27,7 +29,7 @@ def run_eval(testset_path):
 
     # Run forecast with PV and NWP inputs.
     # TODO update pv_df
-    predictions_df = run_forecast(pv_df=None, nwp_df=nwp_df)
+    predictions_df = run_forecast(pv_df=pv_metadata, nwp_df=nwp_df)
 
     # Combine the forecast results with the ground truth (ts, id, horizon (in hours), pred, truth, diff)
     results_df = combine_forecast_ground_truth(predictions_df, ground_truth_df)