diff --git a/README.md b/README.md index a1068b96..53698921 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ This repo contains code to train and evaluate pv-site models. ├── data # Placeholder for data files └── psp # Main python package ├── clients # Client specific code - ├── data_sources # Data sources (PV, NWP, etc.) + ├── data_sources # Data sources (PV, NWP, Satellite, etc.) ├── exp_configs # Experimentation configs - a config defines the different options for │ # training and evaluation models. This directory contains many ready │ # configs where the paths points to the data on Leonardo. diff --git a/exp_reports/013_satellite/readme.md b/exp_reports/013_satellite/readme.md new file mode 100644 index 00000000..a240cb5c --- /dev/null +++ b/exp_reports/013_satellite/readme.md @@ -0,0 +1,38 @@ +## Backtest using Satellite + +We've added satellite data into our model. +We use the satellite data from google public dataset's [here](gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/) + +The backtest was trained on 2018 and 2019 data and tested on 2020 and 2021 data. + +For satellie data we use all the channels, and take an average over 5 by 5 pixels closes to the PV system. + +```bash +poetry run python psp/scripts/train_model.py -n uk_pv -c uk_pv -b 1 +``` + +If we don't include live PV data, and just forecast up to 8 hours ahead, +we get the following results: +The 0 hour improves from 17.0 to 15.6. This is a 9% improvement. +By 8 hours we see very little improvement. +This was run with 20,000 + + +If we include live PV data, and just forecast up to 8 hours ahead, +we get the following results: +The 0 hour improves from 12.9 to 11.6. This is a 10% improvement. +By 8 hours we see very little improvement. + +![results](results.png) + +| config name | NWP | Live PV | Satellite | +|-------------|-----|---------|-----------| +| uk_pv_sat_no_pv | x | | x | +| uk_pv_no_pv | x | | | +| uk_pv_sat | x | x | x | +| uk_pv | x | x | | + + +We don't believe Satellite data adds any value to model after ~2 hours when we include live PV data. + + diff --git a/exp_reports/013_satellite/results.png b/exp_reports/013_satellite/results.png new file mode 100644 index 00000000..cf58b1f0 Binary files /dev/null and b/exp_reports/013_satellite/results.png differ diff --git a/poetry.lock b/poetry.lock index c152b195..2cea34fc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiofiles" @@ -793,6 +793,20 @@ lint = ["black (>=22.6.0)", "mdformat (>0.7)", "mdformat-gfm (>=0.3.5)", "ruff ( test = ["pytest"] typing = ["mypy (>=0.990)"] +[[package]] +name = "configobj" +version = "5.0.8" +description = "Config file reading, writing and validation." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "configobj-5.0.8-py2.py3-none-any.whl", hash = "sha256:a7a8c6ab7daade85c3f329931a807c8aee750a2494363934f8ea84d8a54c87ea"}, + {file = "configobj-5.0.8.tar.gz", hash = "sha256:6f704434a07dc4f4dc7c9a745172c1cad449feb548febd9f7fe362629c627a97"}, +] + +[package.dependencies] +six = "*" + [[package]] name = "contourpy" version = "1.1.1" @@ -2701,16 +2715,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -4052,6 +4056,47 @@ files = [ [package.extras] plugins = ["importlib-metadata"] +[[package]] +name = "pykdtree" +version = "1.3.10" +description = "Fast kd-tree implementation with OpenMP-enabled queries" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pykdtree-1.3.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70a937b9c7b6ec61a9d7cc26794372384cf23961f942a1544493683942492a1e"}, + {file = "pykdtree-1.3.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:101d4b29eda1b9beaf67855af13eca47a65f02b5a6717b88d19d4c7cfc6a6729"}, + {file = "pykdtree-1.3.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a3272fae4f12a4dad799ad394d7749f3a505f722122fa0fffe31e56aea89b1d"}, + {file = "pykdtree-1.3.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbaf640cbcba97fb462da814ec1d317b46a1636451967b84d54c93196fba46d8"}, + {file = "pykdtree-1.3.10-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0b347027d1548f6ad18ad370cbc30b4d94efaab77bee155e413cb230946a50ca"}, + {file = "pykdtree-1.3.10-cp310-cp310-win_amd64.whl", hash = "sha256:8a7d2ff07fa0716d718b64ff9c50abce398450b09b4eceac3c4e83dbd8318f38"}, + {file = "pykdtree-1.3.10-cp310-cp310-win_arm64.whl", hash = "sha256:7a2ca034888222abc5d3f67668d27e5389b082f1706a17e9a6dd37e1d2839eb6"}, + {file = "pykdtree-1.3.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:069543be5d501c40cf861c17dda386f1481932733b1764320d65ce50179f16df"}, + {file = "pykdtree-1.3.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:95817dd3fcff3a5de36286ba33eefcd47fbf46c693c37bea42c4aa06ea3ecdf6"}, + {file = "pykdtree-1.3.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e04add648af5bca296b40ad60204cc6db963c6edbf90f1971655494545976d1"}, + {file = "pykdtree-1.3.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed1fdf7a1b47677cb891396f18874d47d73c9b04e82a149a247548ec3251151c"}, + {file = "pykdtree-1.3.10-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:864f4fd28a45dc52f6c049f90f4a8251ae2f1aa3600cf66332eacea7798f3c7c"}, + {file = "pykdtree-1.3.10-cp311-cp311-win_amd64.whl", hash = "sha256:c9846c871c4e92d90ba970ebdbd93665af3fe287d4a93ec322297b17981cf0a2"}, + {file = "pykdtree-1.3.10-cp311-cp311-win_arm64.whl", hash = "sha256:f3f48dc8ee24f6aa30ba1e4169489b82edc6a2a1ef2d1dc2efba754a2d016063"}, + {file = "pykdtree-1.3.10-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0878f0134a008e0e40c8527fc96fbbc4b3362f193173dbef6a8ab277ebcc82f7"}, + {file = "pykdtree-1.3.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:397b6849223765348ba8b1a8a8c9786dd42c4958b82010b25d923fb8a6af5c35"}, + {file = "pykdtree-1.3.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a24308f6f6db8c4aff434b7b05b279c8711041c57da0448132e84c39bb8184ff"}, + {file = "pykdtree-1.3.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3fd2b4a5cde45cc47dd26868317db9e4705fb9ad9c4c944a3fb7eb7a110f6dce"}, + {file = "pykdtree-1.3.10-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:25769e2a064e0b5f5a80a3885e62e9f727e681b96fbccf4497cc31f716f47579"}, + {file = "pykdtree-1.3.10-cp312-cp312-win_amd64.whl", hash = "sha256:f66fe62cc199d474dd869024300cdc75600d865d79ef90eda5d4798e209f554e"}, + {file = "pykdtree-1.3.10-cp312-cp312-win_arm64.whl", hash = "sha256:870b82b73728d5ed1951fd0d59701d3fa3fdaf3838c08a51084e5c231b578b85"}, + {file = "pykdtree-1.3.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b8e053d18949a2ba1ff8160e36aab4e0df2fbfd9a284b8d5ba7da62b2dd260c"}, + {file = "pykdtree-1.3.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:235a06403a66e347b5c873499a03c3f160b109ebf0abbcb7cc953ee85e2e8e2e"}, + {file = "pykdtree-1.3.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:491770d5305d7d6350c116c89bdcdd26eac02e5ae23d94cb159bf83a0b1b838b"}, + {file = "pykdtree-1.3.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39d957e9f99b68c69458a36e3e173d6b025d93c4ec2bf8715d51e06348f0dacf"}, + {file = "pykdtree-1.3.10-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3070e401408028fb4601362d8cc71f322529cc78f728b435ecef771457e5743e"}, + {file = "pykdtree-1.3.10-cp39-cp39-win_amd64.whl", hash = "sha256:692a1f708eca7c0b6afd8f438f445f6af486db6f822fd488d5ea08c9ee3ae493"}, + {file = "pykdtree-1.3.10-cp39-cp39-win_arm64.whl", hash = "sha256:a00c9bf543dc5205f2d55121ad9fa10209a46f4a8e0207bbb6ce2fe06ef3ef46"}, + {file = "pykdtree-1.3.10.tar.gz", hash = "sha256:41e7c5d669cadc2188acc4bbb4b0b4dcaf492d84512f1e6517a7ab2d122c911d"}, +] + +[package.dependencies] +numpy = "*" + [[package]] name = "pylint" version = "2.17.7" @@ -4151,6 +4196,44 @@ files = [ [package.dependencies] certifi = "*" +[[package]] +name = "pyresample" +version = "1.27.1" +description = "Geospatial image resampling in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pyresample-1.27.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:05e0591c524fc8723633014e901e3341c0867b32531e71ec9b7d849c91f80fca"}, + {file = "pyresample-1.27.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52dbbb659808303071740e787b9c679a031d062b821f61be8432224894f5a8ee"}, + {file = "pyresample-1.27.1-cp310-cp310-win_amd64.whl", hash = "sha256:c23aa7e206c18034f75c14daec815f42c8fd209f46d7db8e859cafb2372ef716"}, + {file = "pyresample-1.27.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d29f7a00c46bacd332f1b024ad9f6f5ced47a28da7c590bd34c7ee45b8edfa08"}, + {file = "pyresample-1.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f91dab2f846ac822b44accac852c5428691db065dbd17511a1ebc79e53a7c94c"}, + {file = "pyresample-1.27.1-cp311-cp311-win_amd64.whl", hash = "sha256:7f56dc2973f1a6beeea2b2d54a01e1b7c34a4933d73375e3328d3a7da29a899d"}, + {file = "pyresample-1.27.1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:8b76b8096dec9a551ec7208dbcadff5b1b59fedc909df0305e1a02e250f7c390"}, + {file = "pyresample-1.27.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f4c7446f5bf4624a14fd0ac225d79ea8206d14089b29854ef5b6efb9331d578a"}, + {file = "pyresample-1.27.1-cp39-cp39-win_amd64.whl", hash = "sha256:73eb3c176070430b5497882d931a43dab85ecd7e931b1c5a8bb41d03977d6222"}, + {file = "pyresample-1.27.1.tar.gz", hash = "sha256:54ea5ac4a6f48cb9af57d891e36488e4bcf09c17c19f67890bf391eb3f0637df"}, +] + +[package.dependencies] +configobj = "*" +numpy = ">=1.10.0" +pykdtree = ">=1.3.1" +pyproj = ">=3.0" +pyyaml = "*" +setuptools = ">=3.2" +shapely = "*" + +[package.extras] +cf = ["xarray"] +dask = ["dask (>=0.16.1)"] +gradient-search = ["shapely"] +numexpr = ["numexpr"] +quicklook = ["cartopy (>=0.20.0)", "matplotlib", "pillow"] +rasterio = ["rasterio"] +tests = ["cartopy (>=0.20.0)", "dask", "matplotlib", "pillow", "pytest-lazy-fixtures", "rasterio", "scipy", "shapely", "xarray", "zarr"] +xarray-bilinear = ["dask", "xarray", "zarr"] + [[package]] name = "pytest" version = "7.4.3" @@ -4279,7 +4362,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -4287,15 +4369,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -4312,7 +4387,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -4320,7 +4394,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -4910,6 +4983,63 @@ others = ["lime"] plots = ["ipython", "matplotlib"] test = ["catboost", "lightgbm", "opencv-python", "pyod", "pyspark", "pytest", "pytest-cov", "pytest-mpl", "sentencepiece", "torch", "transformers", "xgboost"] +[[package]] +name = "shapely" +version = "2.0.2" +description = "Manipulation and analysis of geometric objects" +optional = false +python-versions = ">=3.7" +files = [ + {file = "shapely-2.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6ca8cffbe84ddde8f52b297b53f8e0687bd31141abb2c373fd8a9f032df415d6"}, + {file = "shapely-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:baa14fc27771e180c06b499a0a7ba697c7988c7b2b6cba9a929a19a4d2762de3"}, + {file = "shapely-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:36480e32c434d168cdf2f5e9862c84aaf4d714a43a8465ae3ce8ff327f0affb7"}, + {file = "shapely-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef753200cbffd4f652efb2c528c5474e5a14341a473994d90ad0606522a46a2"}, + {file = "shapely-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9a41ff4323fc9d6257759c26eb1cf3a61ebc7e611e024e6091f42977303fd3a"}, + {file = "shapely-2.0.2-cp310-cp310-win32.whl", hash = "sha256:72b5997272ae8c25f0fd5b3b967b3237e87fab7978b8d6cd5fa748770f0c5d68"}, + {file = "shapely-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:34eac2337cbd67650248761b140d2535855d21b969d76d76123317882d3a0c1a"}, + {file = "shapely-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5b0c052709c8a257c93b0d4943b0b7a3035f87e2d6a8ac9407b6a992d206422f"}, + {file = "shapely-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2d217e56ae067e87b4e1731d0dc62eebe887ced729ba5c2d4590e9e3e9fdbd88"}, + {file = "shapely-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94ac128ae2ab4edd0bffcd4e566411ea7bdc738aeaf92c32a8a836abad725f9f"}, + {file = "shapely-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa3ee28f5e63a130ec5af4dc3c4cb9c21c5788bb13c15e89190d163b14f9fb89"}, + {file = "shapely-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:737dba15011e5a9b54a8302f1748b62daa207c9bc06f820cd0ad32a041f1c6f2"}, + {file = "shapely-2.0.2-cp311-cp311-win32.whl", hash = "sha256:45ac6906cff0765455a7b49c1670af6e230c419507c13e2f75db638c8fc6f3bd"}, + {file = "shapely-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:dc9342fc82e374130db86a955c3c4525bfbf315a248af8277a913f30911bed9e"}, + {file = "shapely-2.0.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:06f193091a7c6112fc08dfd195a1e3846a64306f890b151fa8c63b3e3624202c"}, + {file = "shapely-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:eebe544df5c018134f3c23b6515877f7e4cd72851f88a8d0c18464f414d141a2"}, + {file = "shapely-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7e92e7c255f89f5cdf777690313311f422aa8ada9a3205b187113274e0135cd8"}, + {file = "shapely-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be46d5509b9251dd9087768eaf35a71360de6afac82ce87c636990a0871aa18b"}, + {file = "shapely-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5533a925d8e211d07636ffc2fdd9a7f9f13d54686d00577eeb11d16f00be9c4"}, + {file = "shapely-2.0.2-cp312-cp312-win32.whl", hash = "sha256:084b023dae8ad3d5b98acee9d3bf098fdf688eb0bb9b1401e8b075f6a627b611"}, + {file = "shapely-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:ea84d1cdbcf31e619d672b53c4532f06253894185ee7acb8ceb78f5f33cbe033"}, + {file = "shapely-2.0.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ed1e99702125e7baccf401830a3b94d810d5c70b329b765fe93451fe14cf565b"}, + {file = "shapely-2.0.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7d897e6bdc6bc64f7f65155dbbb30e49acaabbd0d9266b9b4041f87d6e52b3a"}, + {file = "shapely-2.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0521d76d1e8af01e712db71da9096b484f081e539d4f4a8c97342e7971d5e1b4"}, + {file = "shapely-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:5324be299d4c533ecfcfd43424dfd12f9428fd6f12cda38a4316da001d6ef0ea"}, + {file = "shapely-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:78128357a0cee573257a0c2c388d4b7bf13cb7dbe5b3fe5d26d45ebbe2a39e25"}, + {file = "shapely-2.0.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:87dc2be34ac3a3a4a319b963c507ac06682978a5e6c93d71917618b14f13066e"}, + {file = "shapely-2.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:42997ac806e4583dad51c80a32d38570fd9a3d4778f5e2c98f9090aa7db0fe91"}, + {file = "shapely-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ccfd5fa10a37e67dbafc601c1ddbcbbfef70d34c3f6b0efc866ddbdb55893a6c"}, + {file = "shapely-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7c95d3379ae3abb74058938a9fcbc478c6b2e28d20dace38f8b5c587dde90aa"}, + {file = "shapely-2.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a21353d28209fb0d8cc083e08ca53c52666e0d8a1f9bbe23b6063967d89ed24"}, + {file = "shapely-2.0.2-cp38-cp38-win32.whl", hash = "sha256:03e63a99dfe6bd3beb8d5f41ec2086585bb969991d603f9aeac335ad396a06d4"}, + {file = "shapely-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:c6fd29fbd9cd76350bd5cc14c49de394a31770aed02d74203e23b928f3d2f1aa"}, + {file = "shapely-2.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1f217d28ecb48e593beae20a0082a95bd9898d82d14b8fcb497edf6bff9a44d7"}, + {file = "shapely-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:394e5085b49334fd5b94fa89c086edfb39c3ecab7f669e8b2a4298b9d523b3a5"}, + {file = "shapely-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fd3ad17b64466a033848c26cb5b509625c87d07dcf39a1541461cacdb8f7e91c"}, + {file = "shapely-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d41a116fcad58048d7143ddb01285e1a8780df6dc1f56c3b1e1b7f12ed296651"}, + {file = "shapely-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dea9a0651333cf96ef5bb2035044e3ad6a54f87d90e50fe4c2636debf1b77abc"}, + {file = "shapely-2.0.2-cp39-cp39-win32.whl", hash = "sha256:b8eb0a92f7b8c74f9d8fdd1b40d395113f59bd8132ca1348ebcc1f5aece94b96"}, + {file = "shapely-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:794affd80ca0f2c536fc948a3afa90bd8fb61ebe37fe873483ae818e7f21def4"}, + {file = "shapely-2.0.2.tar.gz", hash = "sha256:1713cc04c171baffc5b259ba8531c58acc2a301707b7f021d88a15ed090649e7"}, +] + +[package.dependencies] +numpy = ">=1.14" + +[package.extras] +docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] +test = ["pytest", "pytest-cov"] + [[package]] name = "six" version = "1.16.0" @@ -6030,4 +6160,4 @@ torch = ["torch"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.12" -content-hash = "00f1fd1fffcb9b3aca6a5420b130fbd05199da2c9b0ea2724da723d2feec2407" +content-hash = "401832a8810aaf4ab5a984435718072b091284135535dcb964ab616d04211ec4" diff --git a/psp/data_sources/nwp.py b/psp/data_sources/nwp.py index 2ee7c57c..1395123c 100644 --- a/psp/data_sources/nwp.py +++ b/psp/data_sources/nwp.py @@ -1,72 +1,20 @@ import datetime as dt +import logging import pathlib import pickle -from typing import Optional, TypeVar +from typing import Optional # This import registers a codec. import ocf_blosc2 # noqa import xarray as xr +from psp.data_sources.utils import _STEP, _TIME, _VALUE, _VARIABLE, _X, _Y, slice_on_lat_lon from psp.gis import CoordinateTransformer from psp.typings import Timestamp from psp.utils.dates import to_pydatetime from psp.utils.hashing import naive_hash -T = TypeVar("T", bound=xr.Dataset | xr.DataArray) - -_X = "x" -_Y = "y" -_TIME = "time" -_STEP = "step" -_VARIABLE = "variable" -_VALUE = "value" - - -def _slice_on_lat_lon( - data: T, - *, - min_lat: float | None = None, - max_lat: float | None = None, - min_lon: float | None = None, - max_lon: float | None = None, - nearest_lat: float | None = None, - nearest_lon: float | None = None, - transformer: CoordinateTransformer, - x_is_ascending: bool, - y_is_ascending: bool, -) -> T: - # Only allow `None` values for lat/lon if they are all None (in which case we don't filter - # by lat/lon). - num_none = sum([x is None for x in [min_lat, max_lat, min_lon, max_lon]]) - assert num_none in [0, 4] - - if min_lat is not None: - assert min_lat is not None - assert min_lon is not None - assert max_lat is not None - assert max_lon is not None - - assert max_lat >= min_lat - assert max_lon >= min_lon - - point1, point2 = transformer([(min_lat, min_lon), (max_lat, max_lon)]) - min_x, min_y = point1 - max_x, max_y = point2 - - if not x_is_ascending: - min_x, max_x = max_x, min_x - if not y_is_ascending: - min_y, max_y = max_y, min_y - - # Type ignore because this is still simpler than addin some `@overload`. - return data.sel(x=slice(min_x, max_x), y=slice(min_y, max_y)) # type: ignore - - elif nearest_lat is not None and nearest_lon is not None: - ((x, y),) = transformer([(nearest_lat, nearest_lon)]) - - return data.sel(x=x, y=y, method="nearest") # type: ignore - - return data +_log = logging.getLogger(__name__) class NwpDataSource: @@ -90,8 +38,9 @@ def __init__( y_is_ascending: bool = True, cache_dir: str | None = None, lag_minutes: float = 0.0, - nwp_tolerance: Optional[str] = None, - nwp_variables: Optional[list[str]] = None, + tolerance: Optional[str] = None, + variables: Optional[list[str]] = None, + filter_on_step: Optional[bool] = True, ): """ Arguments: @@ -115,6 +64,7 @@ def __init__( nwp_tolerance: How old should the NWP predictions be before we start ignoring them. See `NwpDataSource.get`'s documentation for details.. nwp_variables: Only use this subset of NWP variables. Defaults to using all. + """ if isinstance(paths_or_data, str): paths_or_data = [paths_or_data] @@ -140,17 +90,21 @@ def __init__( self._lag_minutes = lag_minutes - self._nwp_tolerance = nwp_tolerance - self._nwp_variables = nwp_variables + self._tolerance = tolerance + self._variables = variables self._data = self._prepare_data(raw_data) + self.raw_data = raw_data self._cache_dir = pathlib.Path(cache_dir) if cache_dir else None if self._cache_dir: self._cache_dir.mkdir(exist_ok=True) + self._filter_on_step = filter_on_step + def _open(self, paths: list[str]) -> xr.Dataset: + _log.debug(f"Opening data {paths}") return xr.open_mfdataset( paths, engine="zarr", @@ -176,8 +130,8 @@ def _prepare_data(self, data: xr.Dataset) -> xr.Dataset: data = data.rename(rename_map) # Filter data to keep only the variables in self._nwp_variables if it's not None - if self._nwp_variables is not None: - data = data.sel(variable=self._nwp_variables) + if self._variables is not None: + data = data.sel(variable=self._variables) return data @@ -235,7 +189,7 @@ def get( raise ValueError(f'Timestamp "{t}" should be after now={now}') # Only cache for nearest_* because lat/lon ranges could be big. - use_cache = self._cache_dir and (nearest_lon is not None or nearest_lat is not None) + use_cache = self._cache_dir data = None @@ -246,6 +200,10 @@ def get( now, nearest_lat, nearest_lon, + min_lat, + max_lat, + min_lon, + max_lon, self._paths, self._lag_minutes, tolerance, @@ -260,6 +218,7 @@ def get( # If it was not loaded from the cache, we load it from the original dataset. if data is None: + data = self._get( now=now, timestamps=timestamps, @@ -313,7 +272,7 @@ def _get( assert tolerance is not None return None - ds = _slice_on_lat_lon( + ds = slice_on_lat_lon( ds, min_lat=min_lat, max_lat=max_lat, @@ -331,8 +290,9 @@ def _get( # How long after `time` do we need the predictions. deltas = [t - init_time for t in timestamps] - # Get the nearest prediction to what we are interested in. - ds = ds.sel(step=deltas, method="nearest") + if self._filter_on_step: + # Get the nearest prediction to what we are interested in. + ds = ds.sel(step=deltas, method="nearest") da = ds[_VALUE] diff --git a/psp/data_sources/satellite.py b/psp/data_sources/satellite.py new file mode 100644 index 00000000..ba9d03a0 --- /dev/null +++ b/psp/data_sources/satellite.py @@ -0,0 +1,63 @@ +import pyresample +import xarray as xr + +from psp.data_sources.nwp import NwpDataSource +from psp.data_sources.utils import _TIME, _VALUE, _VARIABLE, _X, _Y +from psp.gis import CoordinateTransformer + + +class SatelliteDataSource(NwpDataSource): + def __init__(self, *args, **kwargs): + super().__init__( + *args, + **kwargs, + filter_on_step=False, + x_dim_name="x_geostationary", + y_dim_name="y_geostationary", + value_name="data", + ) + + # Get the coordinate transformer.# get crs + area_definition_yaml = self._data.value.attrs["area"] + geostationary_area_definition = pyresample.area_config.load_area_from_string( + area_definition_yaml + ) + geostationary_crs = geostationary_area_definition.crs + + # Get the coordinate transformer, from lat/lon to geostationary. + self._coordinate_transformer = CoordinateTransformer(from_=4326, to=geostationary_crs) + + def prepare_data(self, data: xr.Dataset) -> xr.Dataset: + # Rename the dimensions. + rename_map: dict[str, str] = {} + for old, new in zip( + [ + self._x_dim_name, + self._y_dim_name, + self._time_dim_name, + self._variable_dim_name, + self._value_name, + ], + [_X, _Y, _TIME, _VARIABLE, _VALUE], + ): + if old != new: + rename_map[old] = new + + data = data.rename(rename_map) + + # Filter data to keep only the variables in self._nwp_variables if it's not None + if self._variables is not None: + data = data.sel(variable=self._variables) + + return data + + def _open(self, paths: list[str]) -> xr.Dataset: + d = xr.open_mfdataset( + paths, + engine="zarr", + concat_dim="time", + combine="nested", + chunks="auto", + join="override", + ) + return d diff --git a/psp/data_sources/utils.py b/psp/data_sources/utils.py new file mode 100644 index 00000000..7637dda4 --- /dev/null +++ b/psp/data_sources/utils.py @@ -0,0 +1,86 @@ +from typing import TypeVar + +import xarray as xr + +from psp.gis import CoordinateTransformer + +_X = "x" +_Y = "y" +_TIME = "time" +_STEP = "step" +_VARIABLE = "variable" +_VALUE = "value" + +T = TypeVar("T", bound=xr.Dataset | xr.DataArray) + + +def slice_on_lat_lon( + data: T, + *, + min_lat: float | None = None, + max_lat: float | None = None, + min_lon: float | None = None, + max_lon: float | None = None, + nearest_lat: float | None = None, + nearest_lon: float | None = None, + transformer: CoordinateTransformer, + x_is_ascending: bool, + y_is_ascending: bool, +) -> T: + """ + Slice the data on lat/lon + + Args: + ---- + data: The data to slice + min_lat: The minimum latitude to slice on + max_lat: The maximum latitude to slice on + min_lon: The minimum longitude to slice on + max_lon: The maximum longitude to slice on + nearest_lat: The latitude to slice on + nearest_lon: The longitude to slice on + transformer: The transformer to use to convert lat/lon to x/y + x_is_ascending: Whether the x values are ascending + y_is_ascending: Whether the y values are ascending + do_average: Take average over the area, of x and y coordinates + """ + # Only allow `None` values for lat/lon if they are all None (in which case we don't filter + # by lat/lon). + num_none = sum([x is None for x in [min_lat, max_lat, min_lon, max_lon]]) + assert num_none in [0, 4] + + if min_lat is not None: + assert min_lat is not None + assert min_lon is not None + assert max_lat is not None + assert max_lon is not None + + assert max_lat >= min_lat + assert max_lon >= min_lon + + # This looks funny because when going from lat/lon to osgb we have to use + # (x, y) = transformer([(lat, lon)]) + # however for lat/lon to geostationary we have to use + # (x_geo, y_geo) = transformer([(lon, lat)]) + + points = [(min_lat, min_lon), (max_lat, max_lon)] + point1, point2 = transformer(points) + min_x, min_y = point1 + max_x, max_y = point2 + + if not x_is_ascending: + min_x, max_x = max_x, min_x + if not y_is_ascending: + min_y, max_y = max_y, min_y + + new_data = data.sel(x=slice(min_x, max_x), y=slice(min_y, max_y)) + + # Type ignore because this is still simpler than adding some `@overload`. + return new_data # type: ignore + + elif nearest_lat is not None and nearest_lon is not None: + ((x, y),) = transformer([(nearest_lat, nearest_lon)]) + + return data.sel(x=x, y=y, method="nearest") # type: ignore + + return data diff --git a/psp/exp_configs/island.py b/psp/exp_configs/island.py index 34781d2c..54243660 100644 --- a/psp/exp_configs/island.py +++ b/psp/exp_configs/island.py @@ -51,7 +51,7 @@ def get_data_source_kwargs(self): x_dim_name="latitude", y_dim_name="longitude", x_is_ascending=False, - nwp_tolerance=None, + tolerance=None, ), "EXC": NwpDataSource( EXC_PATH, @@ -62,7 +62,7 @@ def get_data_source_kwargs(self): x_is_ascending=True, y_is_ascending=True, lag_minutes=8 * 60, - nwp_tolerance=None, + tolerance=None, ), }, ) diff --git a/psp/exp_configs/mone.py b/psp/exp_configs/mone.py index 33db7389..dd236b79 100644 --- a/psp/exp_configs/mone.py +++ b/psp/exp_configs/mone.py @@ -51,7 +51,7 @@ def get_data_source_kwargs(self): value_name="UKV", y_is_ascending=False, # Those are the variables available in our prod environment. - nwp_variables=[ + variables=[ "si10", "vis", # "r2", @@ -64,7 +64,7 @@ def get_data_source_kwargs(self): "mcc", "lcc", ], - nwp_tolerance="168h", + tolerance="168h", ), "EXC": NwpDataSource( EXC_PATH, @@ -75,7 +75,7 @@ def get_data_source_kwargs(self): x_is_ascending=True, y_is_ascending=True, lag_minutes=8 * 60, - nwp_tolerance=None, + tolerance=None, ), }, ) diff --git a/psp/exp_configs/sme.py b/psp/exp_configs/sme.py index a37578e6..3df18ea4 100644 --- a/psp/exp_configs/sme.py +++ b/psp/exp_configs/sme.py @@ -69,7 +69,7 @@ def get_data_source_kwargs(self): value_name="UKV", y_is_ascending=False, # Those are the variables available in our prod environment. - nwp_variables=[ + variables=[ "si10", "vis", # "r2", @@ -82,7 +82,7 @@ def get_data_source_kwargs(self): "mcc", "lcc", ], - nwp_tolerance="168h", + tolerance="168h", lag_minutes=4 * 60, ), "EXC": NwpDataSource( @@ -94,7 +94,7 @@ def get_data_source_kwargs(self): x_is_ascending=True, y_is_ascending=True, lag_minutes=7 * 60, - nwp_tolerance="168h", + tolerance="168h", ), "ECMWF": NwpDataSource( ECMWF_PATH, @@ -106,7 +106,7 @@ def get_data_source_kwargs(self): x_is_ascending=True, y_is_ascending=False, lag_minutes=6 * 60, - nwp_tolerance="168h", + tolerance="168h", ), }, ) diff --git a/psp/exp_configs/test_config1.py b/psp/exp_configs/test_config1.py index 63f86d21..1a21d0ca 100644 --- a/psp/exp_configs/test_config1.py +++ b/psp/exp_configs/test_config1.py @@ -7,6 +7,7 @@ from psp.data_sources.nwp import NwpDataSource from psp.data_sources.pv import NetcdfPvDataSource, PvDataSource +from psp.data_sources.satellite import SatelliteDataSource from psp.dataset import DateSplits, PvSplits, TestDateSplit, TrainDateSplit from psp.exp_configs.base import ExpConfigBase from psp.models.base import PvSiteModel, PvSiteModelConfig @@ -16,6 +17,7 @@ PV_DATA_PATH = "psp/tests/fixtures/pv_data.nc" NWP_PATH = "psp/tests/fixtures/nwp.zarr" +SATELLITE_PATH = "psp/tests/fixtures/satellite.zarr" def _get_capacity(d): @@ -48,6 +50,12 @@ def get_data_source_kwargs(self): y_is_ascending=False, ), }, + satellite_data_sources={ + "EUMETSAT": SatelliteDataSource( + SATELLITE_PATH, + x_is_ascending=False, + ), + }, ) def get_model_config(self): @@ -68,7 +76,8 @@ def get_model(self, *, random_state: np.random.RandomState | None = None) -> PvS # Make sure the NWP data is used by adding a lot of dropout on the PV data. pv_dropout=0.9, capacity_getter=_get_capacity, - nwp_dropout=0.0 + nwp_dropout=0.0, + satellite_patch_size=0.5, ) def make_pv_splits(self, pv_data_source: PvDataSource) -> PvSplits: diff --git a/psp/exp_configs/uk_pv.py b/psp/exp_configs/uk_pv.py index afd24b5b..0b7608d9 100644 --- a/psp/exp_configs/uk_pv.py +++ b/psp/exp_configs/uk_pv.py @@ -6,6 +6,7 @@ from psp.data_sources.nwp import NwpDataSource from psp.data_sources.pv import NetcdfPvDataSource, PvDataSource +from psp.data_sources.satellite import SatelliteDataSource from psp.dataset import PvSplits, auto_date_split, split_pvs from psp.exp_configs.base import ExpConfigBase from psp.models.base import PvSiteModel, PvSiteModelConfig @@ -26,6 +27,13 @@ for year in range(2018, 2022) ] +SATELLITE_DATA_PATHS = [ + ( + f"gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/{year}_nonhrv.zarr" + ) + for year in range(2018, 2022) +] + # A list of SS_ID that don't contain enough data. # I just didn't want to calculate them everytime. # TODO Get rid of those when we prepare the dataset. @@ -187,7 +195,7 @@ def get_data_source_kwargs(self): y_is_ascending=False, # cache_dir=".nwp_cache", # Those are the variables available in our prod environment. - nwp_variables=[ + variables=[ "si10", "vis", # "r2", @@ -202,6 +210,12 @@ def get_data_source_kwargs(self): ], ), }, + satellite_data_sources={ + "EUMETSAT": SatelliteDataSource( + SATELLITE_DATA_PATHS, + x_is_ascending=False, + ), + }, ) def _get_model_config(self) -> PvSiteModelConfig: diff --git a/psp/exp_configs/uk_pv_prod.py b/psp/exp_configs/uk_pv_prod.py index 5aaf47d7..fa83d7a4 100644 --- a/psp/exp_configs/uk_pv_prod.py +++ b/psp/exp_configs/uk_pv_prod.py @@ -181,6 +181,7 @@ def get_pv_data_source(self): def get_data_source_kwargs(self): return dict( pv_data_source=self.get_pv_data_source(), + # add new satelite here nwp_data_source={ "UKV": NwpDataSource( NWP_DATA_PATHS, @@ -190,7 +191,7 @@ def get_data_source_kwargs(self): y_is_ascending=False, cache_dir=".nwp_cache", # Those are the variables available in our prod environment. - nwp_variables=[ + variables=[ "si10", "vis", # "r2", @@ -228,19 +229,17 @@ def get_model(self, *, random_state: np.random.RandomState | None = None) -> PvS def make_pv_splits(self, pv_data_source: PvDataSource) -> PvSplits: return split_pvs(pv_data_source) - def get_date_splits(self): + def get_date_splits(self, step_minutes: int = 1) -> DateSplits: # Train 2 models, one at the beginning of the test range, and one 1 month before the end. # The last one is the one we'll use in production. return DateSplits( train_date_splits=[ - TrainDateSplit( - train_date=date, - train_days=365 * 2, - ) + TrainDateSplit(train_date=date, train_days=365 * 2, step_minutes=15) for date in [dt.datetime(2020, 1, 1), dt.datetime(2021, 10, 8)] ], test_date_split=TestDateSplit( start_date=dt.datetime(2020, 1, 1), end_date=dt.datetime(2021, 11, 8), + step_minutes=step_minutes, ), ) diff --git a/psp/models/recent_history.py b/psp/models/recent_history.py index fd630348..4cf0911f 100644 --- a/psp/models/recent_history.py +++ b/psp/models/recent_history.py @@ -10,6 +10,7 @@ from psp.data_sources.nwp import NwpDataSource from psp.data_sources.pv import PvDataSource +from psp.data_sources.satellite import SatelliteDataSource from psp.models.base import PvSiteModel, PvSiteModelConfig from psp.models.regressors.base import Regressor from psp.pv import get_irradiance @@ -125,6 +126,7 @@ def __init__( *, pv_data_source: PvDataSource, nwp_data_sources: dict[str, NwpDataSource], + satellite_data_sources: dict[str, SatelliteDataSource] | None = None, regressor: Regressor, random_state: np.random.RandomState | None = None, pv_dropout: float = 0.0, @@ -136,6 +138,9 @@ def __init__( num_days_history: int = 7, nwp_dropout: float = 0.1, nwp_tolerance: Optional[float] = None, + satellite_dropout: float = 0.1, + satellite_tolerance: Optional[float] = None, + satellite_patch_size: float = 0.25, ): """ Arguments: @@ -156,7 +161,11 @@ def __init__( nwp_dropout: Probability of removing the NWP data (replacing it with np.nan). This is only used at train-time. nwp_tolerance: How old should the NWP predictions be before we start ignoring them. - See `NwpDataSource.get`'s documentation for details.. + See `NwpDataSource.get`'s documentation for details. + satellite_dropout: Probability of removing the satellite data (replacing it with np.nan). + satellite_tolerance: How old should the satellite predictions be before + we start ignoring them. + satellite_patch_size: Size of the patch to use for the satellite data. This is in degrees. """ super().__init__(config) # Validate some options. @@ -168,6 +177,7 @@ def __init__( self._pv_data_source: PvDataSource self._nwp_data_sources: dict[str, NwpDataSource] | None + self._satellite_data_sources: dict[str, SatelliteDataSource] | None self._regressor = regressor self._random_state = random_state @@ -187,10 +197,14 @@ def __init__( self._nwp_dropout = nwp_dropout self._nwp_tolerance = nwp_tolerance + self._satellite_dropout = satellite_dropout + self._satellite_tolerance = satellite_tolerance + self._satellite_patch_size = satellite_patch_size self.set_data_sources( pv_data_source=pv_data_source, nwp_data_sources=nwp_data_sources, + sat_data_sources=satellite_data_sources, ) # We bump this when we make backward-incompatible changes in the code, to support old @@ -204,6 +218,7 @@ def set_data_sources( *, pv_data_source: PvDataSource, nwp_data_sources: dict[str, NwpDataSource] | None = None, + sat_data_sources: dict[str, SatelliteDataSource] | None = None, ): """Set the data sources. @@ -211,14 +226,24 @@ def set_data_sources( """ self._pv_data_source = pv_data_source self._nwp_data_sources = nwp_data_sources + self._satellite_data_sources = sat_data_sources # This ensures the nwp fixture passed for the test is a dictionary if isinstance(self._nwp_data_sources, dict) or self._nwp_data_sources is None: pass - else: self._nwp_data_sources = dict(nwp_data_source=self._nwp_data_sources) + # this make sure the satellite data is a dictionary + if (self._satellite_data_sources is not None) and ( + not isinstance(self._satellite_data_sources, dict) + ): + self._satellite_data_sources = dict(sat_data_source=self._satellite_data_sources) + + # set this attribute so it works for older models + if not hasattr(self, "_satellite_patch_size"): + self._nwp_patch_size = 0 + def predict_from_features(self, x: X, features: Features) -> Y: powers = self._regressor.predict(features) y = Y(powers=powers) @@ -356,9 +381,8 @@ def _get_features(self, x: X, is_training: bool) -> Features: if self._nwp_data_sources is not None: for source_key, source in self._nwp_data_sources.items(): - - if source._nwp_tolerance is not None: - tolerance = str(source._nwp_tolerance) + if source._tolerance is not None: + tolerance = str(source._tolerance) else: tolerance = None @@ -406,6 +430,90 @@ def _get_features(self, x: X, is_training: bool) -> Features: features[variable_source_key] = var_per_horizon features[variable_source_key + "_isnan"] = var_per_horizon_is_nan + # add another section here fore getting the satellite data + if self._satellite_data_sources is not None: + + # add the forecast horizon to the features. This is because the satellite data is + # only available for the current time step, but not as a forecast, compared to NWP + # which are available at all timesteps + # not each horizon is the start and end horizon + feature_forecast_horizons = [] + for horizon in self.config.horizons: + feature_forecast_horizons.append((horizon[0] + horizon[1]) / 2.0) + features["forecast_horizons"] = np.array(feature_forecast_horizons) + + # loop over satellite sources + for source_key, source in self._satellite_data_sources.items(): + if source._tolerance is not None: + tolerance = str(source._tolerance) + else: + tolerance = None + + if ( + is_training + and self._nwp_dropout > 0.0 + and self._random_state is not None + and self._random_state.random() < self._satellite_dropout + ): + satellite_data = None + else: + + if self._satellite_patch_size > 0: + satellite_data = source.get( + now=x.ts, + timestamps=horizon_timestamps, + min_lat=lat - self._satellite_patch_size / 2, + max_lat=lat + self._satellite_patch_size / 2, + min_lon=lon - self._satellite_patch_size / 2, + max_lon=lon + self._satellite_patch_size / 2, + nearest_lon=lon, + tolerance=tolerance, + ) + + # take mean over x and y + if satellite_data is not None: + satellite_data = satellite_data.mean(dim=["x", "y"]) + + else: + satellite_data = source.get( + now=x.ts, + timestamps=horizon_timestamps, + nearest_lat=lat, + nearest_lon=lon, + tolerance=tolerance, + ) + satellite_variables = source.list_variables() + + for variable in satellite_variables: + # Deal with the trivial case where the returns Satellite is simply `None`. + # This happens if there wasn't any data for the given tolerance. + if satellite_data is not None: + var = satellite_data.sel(variable=variable).values + + # expand satellite data to all time steps + var_per_horizon = np.array([var for _ in self.config.horizons]) + + else: + var_per_horizon = np.array([np.nan for _ in self.config.horizons]) + + # Deal with potential NaN values in NWP. + var_per_horizon_is_nan = np.isnan(var_per_horizon) * 1.0 + var_per_horizon = np.nan_to_num( + var_per_horizon, nan=0.0, posinf=0.0, neginf=0.0 + ) + + # We only want to append the name of the Satellite variable to include the + # provider + # if there are multiple Satellite data sources + if len(self._satellite_data_sources) > 1: + variable_source_key = variable + source_key + + else: + variable_source_key = variable + + features[variable_source_key] = var_per_horizon + features[variable_source_key + "_isnan"] = var_per_horizon_is_nan + # Get the recent power. recent_power = float( data.sel(ts=slice(x.ts - timedelta(minutes=recent_power_minutes), x.ts)).mean() @@ -486,6 +594,7 @@ def get_state(self): # for multiprocessing. del state["_pv_data_source"] del state["_nwp_data_sources"] + del state["_satellite_data_sources"] return state def set_state(self, state): diff --git a/psp/tests/conftest.py b/psp/tests/conftest.py index da0ed12a..e93c3802 100644 --- a/psp/tests/conftest.py +++ b/psp/tests/conftest.py @@ -2,6 +2,7 @@ from psp.data_sources.nwp import NwpDataSource from psp.data_sources.pv import NetcdfPvDataSource +from psp.data_sources.satellite import SatelliteDataSource @pytest.fixture @@ -36,3 +37,13 @@ def nwp_data_sources(pv_data_source): y_is_ascending=False, ), } + + +@pytest.fixture +def satellite_data_sources(pv_data_source): + return { + "EUMETSAT": SatelliteDataSource( + "psp/tests/fixtures/satellite.zarr", + x_is_ascending=False, + ), + } diff --git a/psp/tests/data/data_sources/test_nwp_data_source.py b/psp/tests/data/data_sources/test_nwp_data_source.py index b180f9d3..7370a95c 100644 --- a/psp/tests/data/data_sources/test_nwp_data_source.py +++ b/psp/tests/data/data_sources/test_nwp_data_source.py @@ -157,7 +157,6 @@ def test_nwp_data_source_check_times_many_steps( ], ) def test_nwp_data_source_space(reverse_x, reverse_y, lat, lon, expected_size, nwp_data_sources): - # Reverse the data order and see if our flag words. if reverse_x: nwp_data_sources._data = nwp_data_sources._data.sortby("x", ascending=False) diff --git a/psp/tests/data/data_sources/test_satellite_data_source.py b/psp/tests/data/data_sources/test_satellite_data_source.py new file mode 100644 index 00000000..317e9630 --- /dev/null +++ b/psp/tests/data/data_sources/test_satellite_data_source.py @@ -0,0 +1,38 @@ +from datetime import datetime + +import ocf_blosc2 # noqa +import xarray as xr + +from psp.data_sources.satellite import SatelliteDataSource + + +def test_satellite_data_source(): + """Test loading the satellite data + + Note this test uses the satellite public dataset to get this data. + It can take about 30 seconds to run + + """ + # this is for the google datasets + paths = [ + "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" + ] + + sat = SatelliteDataSource(paths_or_data=paths, x_is_ascending=False) + + now = datetime(2021, 2, 1) + lat = 50 + lon = 0 + + example = sat.get(now=now, timestamps=[now], nearest_lat=lat, nearest_lon=lon) + + assert isinstance(example, xr.DataArray) + assert example.x.size > 0 + assert example.y.size > 0 + + example = sat.get( + now=now, timestamps=[now], max_lat=lat + 1, min_lat=lat, max_lon=lon + 1, min_lon=lon + ) + assert isinstance(example, xr.DataArray) + assert example.x.size > 0 + assert example.y.size > 0 diff --git a/psp/tests/data/data_sources/test_utils.py b/psp/tests/data/data_sources/test_utils.py new file mode 100644 index 00000000..7a873126 --- /dev/null +++ b/psp/tests/data/data_sources/test_utils.py @@ -0,0 +1,41 @@ +from psp.data_sources.utils import slice_on_lat_lon +from psp.gis import CoordinateTransformer + + +def test_slice_on_max_min(nwp_data_sources): + + transformer = CoordinateTransformer(from_=4326, to=27700) + + data = nwp_data_sources["UKV"]._data + print(data) + new_data = slice_on_lat_lon( + data=data, + max_lat=55, + min_lat=48, + max_lon=-1, + min_lon=-4, + transformer=transformer, + y_is_ascending=False, + x_is_ascending=True, + ) + + assert new_data.x.size == 2 + assert new_data.y.size == 2 + + +def test_slice_on_nearest(nwp_data_sources): + + transformer = CoordinateTransformer(from_=4326, to=27700) + + data = nwp_data_sources["UKV"]._data + new_data = slice_on_lat_lon( + data=data, + nearest_lat=52, + nearest_lon=-2, + transformer=transformer, + y_is_ascending=False, + x_is_ascending=True, + ) + + assert new_data.x.size == 1 + assert new_data.y.size == 1 diff --git a/psp/tests/fixtures/README.md b/psp/tests/fixtures/README.md index ffab29c0..cf5ee5d9 100644 --- a/psp/tests/fixtures/README.md +++ b/psp/tests/fixtures/README.md @@ -8,7 +8,7 @@ import datetime as dt import ocf_blosc2 import xarray as xr -from psp.data_sources.nwp import _slice_on_lat_lon +from psp.data_sources.utils import slice_on_lat_lon from psp.gis import CoordinateTransformer nwp = xr.open_dataset( @@ -16,7 +16,7 @@ nwp = xr.open_dataset( "/UK_Met_Office/UKV/zarr/UKV_2020_NWP.zarr" ) -nwp = _slice_on_lat_lon( +nwp = slice_on_lat_lon( nwp, min_lat=51.1, max_lat=53.7, @@ -52,3 +52,49 @@ nwp.to_zarr( encoding={"UKV": {"chunks": [chunks[d] for d in nwp.dims]}}, ) ``` + +The satellite data has been generated with +```python +import datetime as dt + +import ocf_blosc2 +import xarray as xr +from psp.data_sources.satellite import SatelliteDataSource +from psp.data_sources.utils import slice_on_lat_lon +from psp.gis import CoordinateTransformer + +paths = [ + "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr" +] + +sat = SatelliteDataSource(paths_or_data=paths, x_is_ascending=False) + +x_min, y_min = sat.lonlat_to_geostationary(xx=-3.3, yy=51.1) +x_max, y_max = sat.lonlat_to_geostationary(xx=-2.7, yy=53.7) +d = sat._data +sat = sat._data + + +sat = sat.sel(variable=["IR_016", "IR_039", "IR_087", "IR_097"]) + +sat = sat.sel(x=slice(x_max, x_min)) +sat = sat.sel(y=slice(y_min, y_max)) +sat = sat.sel(time=slice(dt.datetime(2020, 1, 1), dt.datetime(2020, 1, 14))) + + +# rename back to old variables +sat = sat.rename({'x':'x_geostationary', 'y':'y_geostationary', 'value':'data'}) + +chunks = { + "time": -1, + "x_geostationary": -1, + "y_geostationary": -1, + "variable": -1, +} + +sat.to_zarr( + "psp/tests/fixtures/satellite.zarr", + mode="w", + encoding={"data": {"chunks": [chunks[d] for d in sat.dims]}}, +) +``` \ No newline at end of file diff --git a/psp/tests/fixtures/models/model_v9.pkl b/psp/tests/fixtures/models/model_v9.pkl new file mode 100644 index 00000000..ae3339d1 Binary files /dev/null and b/psp/tests/fixtures/models/model_v9.pkl differ diff --git a/psp/tests/fixtures/satellite.zarr/.zattrs b/psp/tests/fixtures/satellite.zarr/.zattrs new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/psp/tests/fixtures/satellite.zarr/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/psp/tests/fixtures/satellite.zarr/.zgroup b/psp/tests/fixtures/satellite.zarr/.zgroup new file mode 100644 index 00000000..3b7daf22 --- /dev/null +++ b/psp/tests/fixtures/satellite.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/psp/tests/fixtures/satellite.zarr/.zmetadata b/psp/tests/fixtures/satellite.zarr/.zmetadata new file mode 100644 index 00000000..d3eea187 --- /dev/null +++ b/psp/tests/fixtures/satellite.zarr/.zmetadata @@ -0,0 +1,408 @@ +{ + "metadata": { + ".zattrs": {}, + ".zgroup": { + "zarr_format": 2 + }, + "data/.zarray": { + "chunks": [ + 1549, + 45, + 30, + 4 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": "