Skip to content
This repository has been archived by the owner on Jan 26, 2022. It is now read-only.

Commit

Permalink
Automatically detect and fill bad values in CDF files
Browse files Browse the repository at this point in the history
  • Loading branch information
dstansby committed Feb 8, 2020
1 parent 06699e5 commit d2188c2
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 30 deletions.
2 changes: 0 additions & 2 deletions heliopy/data/ace.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ def _ace(starttime, endtime, identifier, warn_missing_units=True):
"""
Generic method for downloading ACE data.
"""
badvalues = 1e-31
return cdasrest.CDASDwonloader('ac', identifier, 'ace',
badvalues=badvalues,
warn_missing_units=warn_missing_units)


Expand Down
6 changes: 2 additions & 4 deletions heliopy/data/helios.py
Original file line number Diff line number Diff line change
Expand Up @@ -1095,21 +1095,19 @@ def _docstring(identifier, extra):
return cdasrest._docstring(identifier, 'M', extra)


def _helios(starttime, endtime, identifier, units=None, badvalues=None,
def _helios(starttime, endtime, identifier, units=None,
warn_missing_units=True):
"""
Generic method for downloading Helios data from CDAWeb.
"""
dl = cdasrest.CDASDwonloader('helios', identifier, 'helios', units=units,
badvalues=badvalues,
warn_missing_units=warn_missing_units)
return dl.load(starttime, endtime)


def merged(probe, starttime, endtime):
identifier = f'HELIOS{probe}_40SEC_MAG-PLASMA'
badvalues = [-1e31]
return _helios(starttime, endtime, identifier, badvalues=badvalues,
return _helios(starttime, endtime, identifier,
warn_missing_units=False)


Expand Down
3 changes: 1 addition & 2 deletions heliopy/data/imp.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,12 @@ def merged(probe, starttime, endtime):
return dl.load(starttime, endtime)


def _imp8(starttime, endtime, identifier, units=None, badvalues=None,
def _imp8(starttime, endtime, identifier, units=None,
warn_missing_units=True):
"""
Generic method for downloading IMP8 data.
"""
dl = cdasrest.CDASDwonloader('imp8', identifier, 'imp', units=units,
badvalues=badvalues,
warn_missing_units=warn_missing_units)
return dl.load(starttime, endtime)

Expand Down
6 changes: 2 additions & 4 deletions heliopy/data/psp.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,11 @@ def download(self, interval):
def load_local_file(self, interval):
local_path = self.local_path(interval)
cdf = util._load_cdf(local_path)
return util.cdf2df(
cdf, index_key=self.epoch_label, badvalues=self.badvalues)
return util.cdf2df(cdf, index_key=self.epoch_label)


# SWEAP classes/methods
class _SWEAPDownloader(_PSPDownloader):
badvalues = [-1e31]
units = {'u/e': u.dimensionless_unscaled}
# Fill in some missing units
for i in range(3):
Expand Down Expand Up @@ -81,7 +79,7 @@ def sweap_spc_l3(starttime, endtime):

# FIELDS classes/methods
class _FIELDSDownloader(_PSPDownloader):
badvalues = None
pass


class _FIELDSmag_RTN_1min_Downloader(_FIELDSDownloader):
Expand Down
28 changes: 18 additions & 10 deletions heliopy/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ def cdf_units(cdf_, manual_units=None, length=None):
val = []
val.append(key)
for x in range(0, ncols[1]):
field = key + "{}".format('_' + str(x))
field = f'{key}_{x}'
val.append(field)
key_dict[key] = val

Expand Down Expand Up @@ -732,9 +732,7 @@ def cdf2df(cdf, index_key, dtimeindex=True, badvalues=None,
If ``True``, the DataFrame index is parsed as a datetime.
Default is ``True``.
badvalues : dict, list, optional
A dictionary that maps the new DataFrame column keys to a list of bad
values to replace with nans. Alternatively a list of numbers which are
replaced with nans in all columns.
Deprecated.
ignore : list, optional
In case a CDF file has columns that are unused / not required, then
the column names can be passed as a list into the function.
Expand All @@ -748,6 +746,10 @@ def cdf2df(cdf, index_key, dtimeindex=True, badvalues=None,
df : :class:`pandas.DataFrame`
Data frame with read in data.
"""
if badvalues is not None:
warnings.warn('The badvalues argument is decprecated, as bad values '
'are now automatically recognised using the FILLVAL CDF '
'attribute.', DeprecationWarning)
if include is not None:
if ignore is not None:
raise ValueError('ignore and include are incompatible keywords')
Expand Down Expand Up @@ -818,23 +820,29 @@ def cdf2df(cdf, index_key, dtimeindex=True, badvalues=None,
# Loop through each key and put data into the dataframe
for cdf_key in vars:
df_key = keys[cdf_key]
# Get fill value for this key
fillval = cdf.varattsget(cdf_key)['FILLVAL']

if isinstance(df_key, list):
for i, subkey in enumerate(df_key):
df[subkey] = vars[cdf_key][...][:, i]
data = vars[cdf_key][...][:, i].astype(float)
data[data == fillval] = np.nan
df[subkey] = data
else:
# If ndims is 1, we just have a single column of data
# If ndims is 2, have multiple columns of data under same key
key_shape = vars[cdf_key].shape
ndims = len(key_shape)
if ndims == 1:
df[df_key] = vars[cdf_key][...]
data = vars[cdf_key][...].astype(float)
data[data == fillval] = np.nan
df[df_key] = data
elif ndims == 2:
for i in range(key_shape[1]):
df[df_key + '_' + str(i)] = vars[cdf_key][...][:, i]
data = vars[cdf_key][...][:, i].astype(float)
data[data == fillval] = np.nan
df[df_key + '_' + str(i)] = data

# Replace bad values with nans
if badvalues is not None:
df = df.replace(badvalues, np.nan)
return df


Expand Down
12 changes: 4 additions & 8 deletions heliopy/data/wind.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@ def _docstring(identifier, description):
return cdasrest._docstring(identifier, 'W', description)


def _wind(starttime, endtime, identifier, badvalues=None, units=None,
def _wind(starttime, endtime, identifier, units=None,
intervals='monthly'):
"""
Generic method for downloading ACE data.
"""
dl = cdasrest.CDASDwonloader('wi', identifier, 'wind', badvalues=badvalues,
units=units)
dl = cdasrest.CDASDwonloader('wi', identifier, 'wind', units=units)
# Override intervals
if intervals == 'daily':
dl.intervals = dl.intervals_daily
Expand All @@ -27,10 +26,8 @@ def _wind(starttime, endtime, identifier, badvalues=None, units=None,
# Actual download functions start here
def swe_h1(starttime, endtime):
identifier = 'WI_H1_SWE'
badvalues = 99999.9
units = {'ChisQ_DOF_nonlin': u.dimensionless_unscaled}
return _wind(starttime, endtime, identifier,
badvalues=badvalues, units=units)
return _wind(starttime, endtime, identifier, units=units)


swe_h1.__doc__ = _docstring(
Expand Down Expand Up @@ -76,9 +73,8 @@ def threedp_e0_emfits(starttime, endtime):


def swe_h3(starttime, endtime):
badvalues = -9.99999985e+30
identifier = 'WI_H3_SWE'
return _wind(starttime, endtime, identifier, badvalues=badvalues)
return _wind(starttime, endtime, identifier)


swe_h3.__doc__ = _docstring(
Expand Down

0 comments on commit d2188c2

Please sign in to comment.