Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add variable mapping of psm3 #1374

Merged
merged 20 commits into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sphinx/source/whatsnew/v0.9.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Deprecations

Enhancements
~~~~~~~~~~~~
* Added ``map_variables`` option to :py:func:`pvlib.iotools.get_psm3` and
:py:func:`pvlib.iotools.read_psm3` (:pull:`1374`)
* Added `pvlib.bifacial.infinite_sheds`, containing a model for irradiance
on front and back surfaces of bifacial arrays. (:pull:`717`)
* Added ``map_variables`` option to :func:`~pvlib.iotools.read_crn` (:pull:`1368`)
Expand Down
79 changes: 65 additions & 14 deletions pvlib/iotools/psm3.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

"""
Get PSM3 TMY
see https://developer.nrel.gov/docs/solar/nsrdb/psm3_data_download/
Expand All @@ -8,6 +7,8 @@
import requests
import pandas as pd
from json import JSONDecodeError
import warnings
from pvlib._deprecation import pvlibDeprecationWarning

NSRDB_API_BASE = "https://developer.nrel.gov"
PSM_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-download.csv"
Expand All @@ -20,12 +21,31 @@
'surface_pressure', 'wind_direction', 'wind_speed')
PVLIB_PYTHON = 'pvlib python'

# Dictionary mapping PSM3 names to pvlib names
VARIABLE_MAP = {
'GHI': 'ghi',
'DHI': 'dhi',
'DNI': 'dni',
'Clearsky GHI': 'ghi_clear',
'Clearsky DHI': 'dhi_clear',
'Clearsky DNI': 'dni_clear',
'Solar Zenith Angle': 'solar_zenith',
'Temperature': 'temp_air',
'Relative Humidity': 'relative_humidity',
'Dew point': 'temp_dew',
'Pressure': 'pressure',
'Wind Direction': 'wind_direction',
'Wind Speed': 'wind_speed',
'Surface Albedo': 'albedo',
'Precipitable Water': 'precipitable_water',
}


def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
attributes=ATTRIBUTES, leap_day=False, full_name=PVLIB_PYTHON,
affiliation=PVLIB_PYTHON, timeout=30):
affiliation=PVLIB_PYTHON, map_variables=None, timeout=30):
"""
Retrieve NSRDB PSM3 timeseries weather data from the PSM3 API. The NSRDB
Retrieve NSRDB PSM3 timeseries weather data from the PSM3 API. The NSRDB
is described in [1]_ and the PSM3 API is described in [2]_, [3]_, and [4]_.

.. versionchanged:: 0.9.0
Expand All @@ -48,19 +68,23 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
PSM3 API parameter specifing year or TMY variant to download, see notes
below for options
interval : int, {60, 5, 15, 30}
interval size in minutes, must be 5, 15, 30 or 60. Only used for
interval size in minutes, must be 5, 15, 30 or 60. Only used for
single-year requests (i.e., it is ignored for tmy/tgy/tdy requests).
attributes : list of str, optional
meteorological fields to fetch. If not specified, defaults to
``pvlib.iotools.psm3.ATTRIBUTES``. See references [2]_, [3]_, and [4]_
for lists of available fields.
for lists of available fields. Alternatively, pvlib names may also be
used (e.g. 'ghi' rather than 'GHI'); see :const:`VARIABLE_MAP`.
leap_day : boolean, default False
include leap day in the results. Only used for single-year requests
include leap day in the results. Only used for single-year requests
(i.e., it is ignored for tmy/tgy/tdy requests).
full_name : str, default 'pvlib python'
optional
affiliation : str, default 'pvlib python'
optional
map_variables: boolean, optional
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
timeout : int, default 30
time in seconds to wait for server response before timeout

Expand Down Expand Up @@ -96,14 +120,15 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
+===========+=============================================================+
| Year | 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, |
| | 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, |
| | 2018, 2019 |
| | 2018, 2019, 2020 |
+-----------+-------------------------------------------------------------+
| TMY | tmy, tmy-2016, tmy-2017, tdy-2017, tgy-2017, |
| | tmy-2018, tdy-2018, tgy-2018, tmy-2019, tdy-2019, tgy-2019 |
| | tmy-2020, tdy-2020, tgy-2020 |
+-----------+-------------------------------------------------------------+

.. warning:: PSM3 is limited to data found in the NSRDB, please consult the
references below for locations with available data. Additionally,
references below for locations with available data. Additionally,
querying data with < 30-minute resolution uses a different API endpoint
with fewer available fields (see [4]_).

Expand Down Expand Up @@ -133,6 +158,13 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
# convert to string to accomodate integer years being passed in
names = str(names)

# convert pvlib names in attributes to psm3 convention (reverse mapping)
# unlike psm3 columns, attributes are lower case and with underscores
amap = {value: key.lower().replace(' ', '_') for (key, value) in
VARIABLE_MAP.items()}
attributes = [amap.get(a, a) for a in attributes]
attributes = list(set(attributes)) # remove duplicate values
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is interesting. Do we do "reverse mapping" in any other iotools functions? It is unfortunate that the PSM3 API's input parameter names are different from the output column names.

Maybe clearer for the second line, up to you: attributes = [amap.get(a, a) for a in attributes]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we keep this, it should probably be mentioned in the docstring

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot immediately think of any other function where reverse mapping would make sense. Pretty nifty though! And I think it is in line with the spirit of the iotools, in that it conforms the data access interface with pvlib conventions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AdamRJensen should we add a sentence to the attributes docstring description for this? Something like (feel free to edit):

Alternatively, pvlib names may also be used (e.g. 'ghi' rather than 'GHI'); see :const:PSM3_VARIABLE_MAP.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're referencing the iotools variable maps via :const: then we should think about giving them their own entries in the docs. Something like what we did for pvlib.temperature.TEMPERATURE_MODEL_PARAMETERS maybe? Let's do that in a separate issue though.


# required query-string parameters for request to PSM3 API
params = {
'api_key': api_key,
Expand Down Expand Up @@ -167,12 +199,12 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
# the CSV is in the response content as a UTF-8 bytestring
# to use pandas we need to create a file buffer from the response
fbuf = io.StringIO(response.content.decode('utf-8'))
return parse_psm3(fbuf)
return parse_psm3(fbuf, map_variables)


def parse_psm3(fbuf):
def parse_psm3(fbuf, map_variables=None):
"""
Parse an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
Parse an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
is described in [1]_ and the SAM CSV format is described in [2]_.

.. versionchanged:: 0.9.0
Expand All @@ -184,6 +216,9 @@ def parse_psm3(fbuf):
----------
fbuf: file-like object
File-like object containing data to read.
map_variables: bool
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable VARIABLE_MAP.

Returns
-------
Expand Down Expand Up @@ -296,12 +331,25 @@ def parse_psm3(fbuf):
tz = 'Etc/GMT%+d' % -metadata['Time Zone']
data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)

if map_variables is None:
AdamRJensen marked this conversation as resolved.
Show resolved Hide resolved
warnings.warn(
'PSM3 variable names will be renamed to pvlib conventions by '
'default starting in pvlib 0.11.0. Specify map_variables=True '
'to enable that behavior now, or specify map_variables=False '
'to hide this warning.', pvlibDeprecationWarning)
map_variables = False
if map_variables:
data = data.rename(columns=VARIABLE_MAP)
metadata['latitude'] = metadata.pop('Latitude')
metadata['longitude'] = metadata.pop('Longitude')
metadata['altitude'] = metadata.pop('Elevation')

return data, metadata


def read_psm3(filename):
def read_psm3(filename, map_variables=None):
"""
Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
is described in [1]_ and the SAM CSV format is described in [2]_.

.. versionchanged:: 0.9.0
Expand All @@ -313,6 +361,9 @@ def read_psm3(filename):
----------
filename: str
Filename of a file containing data to read.
map_variables: bool
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable VARIABLE_MAP.

Returns
-------
Expand All @@ -334,5 +385,5 @@ def read_psm3(filename):
<https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
"""
with open(str(filename), 'r') as fbuf:
content = parse_psm3(fbuf)
content = parse_psm3(fbuf, map_variables)
return content
56 changes: 48 additions & 8 deletions pvlib/tests/iotools/test_psm3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@

import os
from pvlib.iotools import psm3
from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY
from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal
import numpy as np
import pandas as pd
import pytest
from requests import HTTPError
from io import StringIO
import warnings
from pvlib._deprecation import pvlibDeprecationWarning

TMY_TEST_DATA = DATA_DIR / 'test_psm3_tmy-2017.csv'
YEAR_TEST_DATA = DATA_DIR / 'test_psm3_2017.csv'
Expand Down Expand Up @@ -76,7 +77,8 @@ def assert_psm3_equal(data, metadata, expected):
def test_get_psm3_tmy(nrel_api_key):
"""test get_psm3 with a TMY"""
data, metadata = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key,
PVLIB_EMAIL, names='tmy-2017')
PVLIB_EMAIL, names='tmy-2017',
map_variables=False)
expected = pd.read_csv(TMY_TEST_DATA)
assert_psm3_equal(data, metadata, expected)

Expand All @@ -86,7 +88,8 @@ def test_get_psm3_tmy(nrel_api_key):
def test_get_psm3_singleyear(nrel_api_key):
"""test get_psm3 with a single year"""
data, metadata = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key,
PVLIB_EMAIL, names='2017', interval=30)
PVLIB_EMAIL, names='2017',
map_variables=False, interval=30)
expected = pd.read_csv(YEAR_TEST_DATA)
assert_psm3_equal(data, metadata, expected)

Expand All @@ -96,7 +99,8 @@ def test_get_psm3_singleyear(nrel_api_key):
def test_get_psm3_5min(nrel_api_key):
"""test get_psm3 for 5-minute data"""
data, metadata = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key,
PVLIB_EMAIL, names='2019', interval=5)
PVLIB_EMAIL, names='2019', interval=5,
map_variables=False)
assert len(data) == 525600/5
first_day = data.loc['2019-01-01']
expected = pd.read_csv(YEAR_TEST_DATA_5MIN)
Expand All @@ -108,7 +112,7 @@ def test_get_psm3_5min(nrel_api_key):
def test_get_psm3_check_leap_day(nrel_api_key):
data_2012, _ = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key,
PVLIB_EMAIL, names="2012", interval=60,
leap_day=True)
leap_day=True, map_variables=False)
assert len(data_2012) == (8760 + 24)


Expand All @@ -133,7 +137,7 @@ def test_get_psm3_tmy_errors(
"""
with pytest.raises(HTTPError) as excinfo:
psm3.get_psm3(latitude, longitude, api_key, PVLIB_EMAIL,
names=names, interval=interval)
names=names, interval=interval, map_variables=False)
# ensure the HTTPError caught isn't due to overuse of the API key
assert "OVER_RATE_LIMIT" not in str(excinfo.value)

Expand All @@ -149,13 +153,49 @@ def io_input(request):

def test_parse_psm3(io_input):
"""test parse_psm3"""
data, metadata = psm3.parse_psm3(io_input)
data, metadata = psm3.parse_psm3(io_input, map_variables=False)
expected = pd.read_csv(YEAR_TEST_DATA)
assert_psm3_equal(data, metadata, expected)


def test_read_psm3():
"""test read_psm3"""
data, metadata = psm3.read_psm3(MANUAL_TEST_DATA)
data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=False)
expected = pd.read_csv(YEAR_TEST_DATA)
assert_psm3_equal(data, metadata, expected)


def test_read_psm3_map_variables():
"""test read_psm3 map_variables=True"""
data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=True)
columns_mapped = ['Year', 'Month', 'Day', 'Hour', 'Minute', 'dhi', 'dni',
'ghi', 'dhi_clear', 'dni_clear', 'ghi_clear',
'Cloud Type', 'Dew Point', 'apparent_zenith',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AdamRJensen this test is failing locally for me because of an apparent_zenith/solar_zenith difference. Shouldn't this be solar_zenith, and shouldn't this test have failed before we merged the PR? I'm very confused why this wasn't failing before.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes indeed. Sorry for the slow response. Thanks for the fix!

'Fill Flag', 'albedo', 'wind_speed',
'precipitable_water', 'wind_direction',
'relative_humidity', 'temp_air', 'pressure']
data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=True)
assert_index_equal(data.columns, pd.Index(columns_mapped))


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_psm3_attribute_mapping(nrel_api_key):
"""Test that pvlib names can be passed in as attributes and get correctly
reverse mapped to PSM3 names"""
data, meta = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key, PVLIB_EMAIL,
names=2019, interval=60,
attributes=['ghi', 'wind_speed'],
map_variables=True)
assert 'ghi' in data.columns
assert 'wind_speed' in data.columns
assert 'latitude' in meta.keys()
assert 'longitude' in meta.keys()
assert 'altitude' in meta.keys()


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_psm3_variable_map_deprecation_warning(nrel_api_key):
with pytest.warns(pvlibDeprecationWarning, match='names will be renamed'):
_ = psm3.read_psm3(MANUAL_TEST_DATA)