From fa7e4a0736e41cdca27622437e79150d061f25f7 Mon Sep 17 00:00:00 2001 From: Steven Suttles Date: Fri, 11 Oct 2024 13:56:27 -0400 Subject: [PATCH] Add filtering/smoothing options (#261) * add signal filtering option * add filters updates and tests * addfilters clean-up * filter order options * fix code comments --------- Co-authored-by: ssuttles-usgs --- doc/config.rst | 8 + stglib/__init__.py | 2 +- stglib/aqd/aqdutils.py | 5 +- stglib/aqd/hrcdf2nc.py | 6 +- stglib/core/filter.py | 207 +++ stglib/rdi/rdiadcpy.py | 1744 ++++++++++++-------- stglib/rsk/cdf2nc.py | 6 +- stglib/sg.py | 6 +- stglib/sig/cdf2nc.py | 35 +- stglib/tests/data/config_1126vec14823.yaml | 4 +- stglib/tests/data/sig1126_config.yaml | 4 + stglib/troll.py | 8 +- stglib/vec/cdf2nc.py | 7 +- 13 files changed, 1317 insertions(+), 725 deletions(-) create mode 100644 stglib/core/filter.py diff --git a/doc/config.rst b/doc/config.rst index 739a2155..c0005903 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -82,6 +82,14 @@ Options applicable to many instrument types include: - ``_mask``: a single variable or list of variables which should be used to fill the given variable. For example ``u_1205_mask: ["cor1_1285", "cor2_1286", "cor3_1287"]`` will set ``u_1205`` to ``_FillValue`` wherever the correlation variables are ``_FillValue`` - ``drop_vars``: a list of variables to be removed from the final file. For example, ``drop_vars: ['nLF_Cond_µS_per_cm', 'Wiper_Position_volt', 'Cable_Pwr_V']``. +Options for signal filtering: + +- ``_lowpass_filt``: apply butterworth lowpass filter with specified cutoff period in seconds. +- ``_highpass_filt``: apply butterworth highpass filter with specified cutoff period in seconds. +- ``_bandpass_filt``: apply butterworth bandpass filter with specified cutoff period in seconds as two element list [cut_long, cut_short]. +- ``_med_filt``: apply n point median filter, where n is specified value (must be an odd number). +- ``filter_order``: specify order of butterworth filter (default = 4 if not specified). + Aquadopp -------- diff --git a/stglib/__init__.py b/stglib/__init__.py index 6abb3da2..b5124c13 100644 --- a/stglib/__init__.py +++ b/stglib/__init__.py @@ -21,7 +21,7 @@ wxt, ) from .aqd import aqdutils -from .core import cmd, utils, waves +from .core import cmd, filter, qaqc, utils, waves from .core.utils import read_globalatts __version__ = _version.get_versions()["version"] diff --git a/stglib/aqd/aqdutils.py b/stglib/aqd/aqdutils.py index e8444d97..5757e792 100644 --- a/stglib/aqd/aqdutils.py +++ b/stglib/aqd/aqdutils.py @@ -818,19 +818,22 @@ def check_attrs(ds, waves=False, hr=False, inst_type="AQD"): ds.attrs["frequency"] = ds.attrs["SIGHeadFrequency"] ds.attrs["instrument_type"] = ds.attrs["SIGInstrumentName"] - # find bin_size attribute + # find bin_size and sample_rate attributes if ( ds.attrs["data_type"].upper() == "BURST" or ds.attrs["data_type"].upper() == "IBURST" ): ds.attrs["bin_size"] = ds.attrs["SIGBurst_CellSize"] + ds.attrs["sample_rate"] = ds.attrs["SIGBurst_SamplingRate"] elif ( ds.attrs["data_type"].upper() == "BURSTHR" or ds.attrs["data_type"].upper() == "IBURSTHR" ): ds.attrs["bin_size"] = ds.attrs["SIGBurstHR_CellSize"] + ds.attrs["sample_rate"] = ds.attrs["SIGBurst_SamplingRate"] elif ds.attrs["data_type"].upper() == "ECHO1": ds.attrs["bin_size"] = ds.attrs["SIGEchoSounder_CellSize"] + ds.attrs["sample_rate"] = ds.attrs["SIGBurst_SamplingRate"] elif ds.attrs["data_type"].upper() == "AVERAGE": ds.attrs["bin_size"] = ds.attrs["SIGAverage_CellSize"] elif ds.attrs["data_type"].upper() == "ALT_AVERAGE": diff --git a/stglib/aqd/hrcdf2nc.py b/stglib/aqd/hrcdf2nc.py index befc97c9..32a9dc5c 100644 --- a/stglib/aqd/hrcdf2nc.py +++ b/stglib/aqd/hrcdf2nc.py @@ -1,6 +1,6 @@ import xarray as xr -from ..core import qaqc, utils +from ..core import filter, qaqc, utils from . import aqdutils @@ -92,6 +92,10 @@ def cdf_to_nc(cdf_filename, atmpres=False): # should function this for var in VEL.data_vars: + # check if any filtering before other qaqc + VEL = filter.apply_butter_filt(VEL, var) + VEL = filter.apply_med_filt(VEL, var) + VEL = qaqc.trim_min(VEL, var) VEL = qaqc.trim_max(VEL, var) VEL = qaqc.trim_min_diff(VEL, var) diff --git a/stglib/core/filter.py b/stglib/core/filter.py new file mode 100644 index 00000000..e45205df --- /dev/null +++ b/stglib/core/filter.py @@ -0,0 +1,207 @@ +import numpy as np +import scipy.signal as spsig +import xarray as xr + +from . import utils + + +def butter_filt(sig, sr, cutfreq, ftype, ford=4): + """ + butterworth filter using sosfiltfilt in scipy.signal + + Parameters + ---------- + sig - signal to be filtered (array) + sr = sample rate of signal (Hz) + cutfreq = cutoff frequency for filter (Hz) length = 1 or 2 (for bandpass) + ftype = type of filter, options = ['lowpass', 'highpass', 'bandpass'] + ford = filter order (default = 4) + + Returns + ------- + filtered signal using specified order (default = 4) butterworth filter + + """ + sos = spsig.butter(ford, cutfreq, btype=ftype, fs=sr, output="sos") + + return spsig.sosfiltfilt(sos, sig) + + +def make_butter_filt(ds, var, sr, cutfreq, ftype): + """ + Create smoothed data using specified butterworth filter type, order, and cutoff for user specified varaibles + + Parameters + ---------- + ds - xarray dataset that contains user specified varaible + var - user specified variable + sr - sample rate of data (hertz) + cutfreq - cutoff frequency(s) for filter (hertz) + ftype - user specified filter type (lowpass, highpass, or bandpass) + + Returns + ------- + ds - dataset with specified variable smoothed/filtered with the specified butterworth filter type, order, and cutoff + + """ + if "filter_order" in ds.attrs: + ford = ds.attrs["filter_order"] + else: + ford = 4 + + if ds[var].ndim == 1 and "time" in ds[var].dims: + print(f"Applying {ftype} filter to {var}") + filtered = butter_filt(ds[var].values, sr, cutfreq, ftype, ford) + ds[var][:] = filtered + + notetxt = f"Values filtered using order = {ford} butterworth {ftype} filter with {cutfreq} cutoff frequency. " + ds = utils.insert_note(ds, var, notetxt) + + elif ds[var].ndim == 2 and "time" in ds[var].dims and "sample" in ds[var].dims: + print(f"Applying {ftype} filter to {var} burst data") + for k in ds["time"]: + + filtered = butter_filt(ds[var].sel(time=k).values, sr, cutfreq, ftype, ford) + ds[var].loc[dict(time=k)] = filtered + + notetxt = f"Values filtered using order = {ford} butterworth {ftype} filter with {cutfreq} Hz cutoff frequency. " + ds = utils.insert_note(ds, var, notetxt) + + elif ds[var].ndim == 2 and "time" in ds[var].dims and "z" in ds[var].dims: + print(f"Applying {ftype} filter to {var} profile data") + for k in ds["z"]: + + filtered = butter_filt(ds[var].sel(z=k).values, sr, cutfreq, ftype, ford) + ds[var].loc[dict(z=k)] = filtered + + notetxt = f"Values filtered using order = {ford} butterworth {ftype} filter with {cutfreq} Hz cutoff frequency. " + ds = utils.insert_note(ds, var, notetxt) + + else: + print( + f"Not able to apply {ftype} filter because only 'time' , ('time','sample'), or ('time','z') dimensions are handled and {var} dims are {ds[var].dims}" + ) + + return ds + + +def apply_butter_filt(ds, var): + """ + Construct and call butterworth filter from user specified config.yaml file + + Parameters + ---------- + ds - xarray dataset with user specified variable + var - user specified variable + + Returns + ------- + ds - dataset with specified variable smoothed/filtered with the specified butterworth filter type, order, and cutoff + + """ + if ( + var + "_lowpass_filt" in ds.attrs + or var + "_highpass_filt" in ds.attrs + or var + "_bandpass_filt" in ds.attrs + ): + + if ( + "sample_rate" in ds.attrs or "sample_interval" in ds.attrs + ): # check to make sure sample_rate or sample_intreval attributes exits. + if "sample_rate" in ds.attrs: + sr = ds.attrs["sample_rate"] + else: + sr = 1 / ds.attrs["sample_interval"] + + if var + "_lowpass_filt" in ds.attrs: + ftype = "lowpass" + cutfreq = 1 / ds.attrs[var + "_lowpass_filt"] + ds = make_butter_filt(ds, var, sr, cutfreq, ftype) + + elif var + "_highpass_filt" in ds.attrs: + ftype = "highpass" + cutfreq = 1 / ds.attrs[var + "_highpass_filt"] + ds = make_butter_filt(ds, var, sr, cutfreq, ftype) + + elif var + "_bandpass_filt" in ds.attrs: + ftype = "bandpass" + cutfreq_lo = 1 / ds.attrs[var + "_bandpass_filt"][0] + cutfreq_hi = 1 / ds.attrs[var + "_bandpass_filt"][1] + cutfreq = [cutfreq_lo, cutfreq_hi] + print(cutfreq) + ds = make_butter_filt(ds, var, sr, cutfreq, ftype) + + else: + raise ValueError( + f"sample_rate or sample _interval do not exits in global attributes, can not apply lowpass filter to {var}. " + ) + + return ds + + +def apply_med_filt(ds, var): + """ + Construct and apply N-point median filter to user specified variable and N + + Parameters + ---------- + ds - xarray dataset with user specified variable + var - user specified variable + + Returns + ------- + ds - dataset with user specified variable smoothed/filtered with the user specified N points (kernel size). + """ + if var + "_med_filt" in ds.attrs: + + kernel_size = ds.attrs[var + "_med_filt"] + # make sure kernel_size is odd number + if kernel_size % 2 == 1: + + if ds[var].ndim == 1 and "time" in ds[var].dims: + print(f"Applying {kernel_size} point median filter to {var}") + filtered = spsig.medfilt(ds[var].values, kernel_size) + ds[var][:] = filtered + + notetxt = f"Values filtered using {kernel_size} point median filter. " + ds = utils.insert_note(ds, var, notetxt) + + elif ( + ds[var].ndim == 2 + and "time" in ds[var].dims + and "sample" in ds[var].dims + ): + print(f"Applying {kernel_size} point median filter to {var} burst data") + + for k in ds["time"]: + + filtered = spsig.medfilt(ds[var].sel(time=k).values, kernel_size) + ds[var].loc[dict(time=k)] = filtered + + notetxt = f"Values filtered using {kernel_size} point median filter. " + ds = utils.insert_note(ds, var, notetxt) + + elif ds[var].ndim == 2 and "time" in ds[var].dims and "z" in ds[var].dims: + print( + f"Applying {kernel_size} point median filter to {var} profile data" + ) + + for k in ds["z"]: + + filtered = spsig.medfilt(ds[var].sel(z=k).values, kernel_size) + ds[var].loc[dict(z=k)] = filtered + + notetxt = f"Values filtered using {kernel_size} point median filter. " + ds = utils.insert_note(ds, var, notetxt) + + else: + print( + f"Not able to apply median filter because only 'time', ('time','sample'), or ('time', 'z') dimensions are handled and {var} dims are {ds[var].dims}" + ) + + else: + raise ValueError( + f"Not able to apply median filter because kernel size specified {kernel_size} is not an odd whole number" + ) + + return ds diff --git a/stglib/rdi/rdiadcpy.py b/stglib/rdi/rdiadcpy.py index 52954041..aa9115f3 100644 --- a/stglib/rdi/rdiadcpy.py +++ b/stglib/rdi/rdiadcpy.py @@ -24,15 +24,18 @@ # 10/4/2018 remove valid_range as it causes too many downstream problems # 1/25/2017 MM got this running on old Workhorse ADCP data -import sys -import struct +import datetime as dt import math +import struct +import sys + import numpy as np +import pandas as pd +import xarray as xr + # this line works in my local environment, fails in Travis from netCDF4 import Dataset -import datetime as dt -import xarray as xr -import pandas as pd + # from adcpy.EPICstuff.EPICmisc import cftime2EPICtime # from adcpy.EPICstuff.EPICmisc import ajd @@ -60,7 +63,7 @@ def convert_pd0_to_netcdf(pd0File, good_ens, serial_number, time_type, delta_t): maxens, ens_len, ens_data, data_start_posn = analyzepd0file(pd0File, verbose) - infile = open(pd0File, 'rb') + infile = open(pd0File, "rb") infile.seek(data_start_posn) @@ -72,7 +75,7 @@ def convert_pd0_to_netcdf(pd0File, good_ens, serial_number, time_type, delta_t): # cdf, cf_units = setup_netcdf_file(cdfFile, ens_data, ens2process, serial_number, time_type, delta_t) # we want to save the time stamp from this ensemble since it is the # time from which all other times in the file will be relative to - t0 = ens_data['VLeader']['dtobj'] + t0 = ens_data["VLeader"]["dtobj"] netcdf_index = 0 ensemble_count = 0 @@ -85,11 +88,11 @@ def convert_pd0_to_netcdf(pd0File, good_ens, serial_number, time_type, delta_t): bookmark = infile.tell() # save beginning of next ensemble # need to read the header from the file to know the ensemble size header = read_TRDI_header(infile) - if header['sourceID'] != b'\x7f': - print('non-currents ensemble found at %d' % bookmark) + if header["sourceID"] != b"\x7f": + print("non-currents ensemble found at %d" % bookmark) - if ens_len != header['nbytesperens']+2: - ens_len = header['nbytesperens']+2 # update to what we have + if ens_len != header["nbytesperens"] + 2: + ens_len = header["nbytesperens"] + 2 # update to what we have # go back to where this ensemble started before we checked the header infile.seek(bookmark) @@ -97,17 +100,41 @@ def convert_pd0_to_netcdf(pd0File, good_ens, serial_number, time_type, delta_t): ens_error = None alldata = {} for n in range(nslantbeams): - for var in ['vel', 'cor', 'att', 'PGd']: + for var in ["vel", "cor", "att", "PGd"]: alldata[f"{var}{n+1}"] = [] - for var in ['Rec', 'time', 'sv', 'Hdg', 'Ptch', 'Roll', 'HdgSTD', 'PtchSTD', 'RollSTD', 'Tx', 'S', 'xmitc', 'xmitv', 'Ambient_Temp', 'Pressure+', 'Pressure-', 'Attitude_Temp', 'EWD1', 'EWD2', 'EWD3', 'EWD4']: + for var in [ + "Rec", + "time", + "sv", + "Hdg", + "Ptch", + "Roll", + "HdgSTD", + "PtchSTD", + "RollSTD", + "Tx", + "S", + "xmitc", + "xmitv", + "Ambient_Temp", + "Pressure+", + "Pressure-", + "Attitude_Temp", + "EWD1", + "EWD2", + "EWD3", + "EWD4", + ]: alldata[var] = [] - alldata['FLeader'] = ens_data['FLeader'] + alldata["FLeader"] = ens_data["FLeader"] bindist = [] - for idx in range(ens_data['FLeader']['Number_of_Cells']): - bindist.append(idx * (ens_data['FLeader']['Depth_Cell_Length_cm'] / 100) + - ens_data['FLeader']['Bin_1_distance_cm'] / 100) - alldata['bindist'] = bindist + for idx in range(ens_data["FLeader"]["Number_of_Cells"]): + bindist.append( + idx * (ens_data["FLeader"]["Depth_Cell_Length_cm"] / 100) + + ens_data["FLeader"]["Bin_1_distance_cm"] / 100 + ) + alldata["bindist"] = bindist while len(ens) > 0: # print('-- ensemble %d length %g, file position %g' % (ensemble_count, len(ens), infile.tell())) @@ -117,154 +144,186 @@ def convert_pd0_to_netcdf(pd0File, good_ens, serial_number, time_type, delta_t): if (ens_error is None) and (ensemble_count >= ens2process[0]): # write to netCDF if netcdf_index == 0: - print('--- first ensembles read at %s and TRDI #%d' % ( - ens_data['VLeader']['timestr'], ens_data['VLeader']['Ensemble_Number'])) + print( + "--- first ensembles read at %s and TRDI #%d" + % ( + ens_data["VLeader"]["timestr"], + ens_data["VLeader"]["Ensemble_Number"], + ) + ) # varobj = cdf.variables['Rec'] try: - tmp = ens_data['VLeader']['Ensemble_Number'] + tmp = ens_data["VLeader"]["Ensemble_Number"] except: # here we have reached the end of the netCDF file cdf.close() infile.close() return - elapsed = ens_data['VLeader']['dtobj']-t0 # timedelta + elapsed = ens_data["VLeader"]["dtobj"] - t0 # timedelta elapsed_sec = elapsed.total_seconds() - alldata['time'].append(dt.datetime(ens_data['VLeader']['Year'], ens_data['VLeader']['Month'], ens_data['VLeader']['Day'], ens_data['VLeader']['Hour'], ens_data['VLeader']['Minute'], ens_data['VLeader']['Second'], int(ens_data['VLeader']['Hundredths'] * 1e4))) # datetime wants it in microseconds + alldata["time"].append( + dt.datetime( + ens_data["VLeader"]["Year"], + ens_data["VLeader"]["Month"], + ens_data["VLeader"]["Day"], + ens_data["VLeader"]["Hour"], + ens_data["VLeader"]["Minute"], + ens_data["VLeader"]["Second"], + int(ens_data["VLeader"]["Hundredths"] * 1e4), + ) + ) # datetime wants it in microseconds # diagnostic - if (ens2process[1]-ens2process[0]-1) < 100: - print('%d %15.8f %s' % (ens_data['VLeader']['Ensemble_Number'], - ens_data['VLeader']['julian_day_from_julian'], - ens_data['VLeader']['timestr'])) - - - alldata['sv'].append(ens_data['VLeader']['Speed_of_Sound']) + if (ens2process[1] - ens2process[0] - 1) < 100: + print( + "%d %15.8f %s" + % ( + ens_data["VLeader"]["Ensemble_Number"], + ens_data["VLeader"]["julian_day_from_julian"], + ens_data["VLeader"]["timestr"], + ) + ) + + alldata["sv"].append(ens_data["VLeader"]["Speed_of_Sound"]) for i in range(nslantbeams): - varname = "vel%d" % (i+1) - alldata[varname].append(ens_data['VData'][i, :]) + varname = "vel%d" % (i + 1) + alldata[varname].append(ens_data["VData"][i, :]) for i in range(nslantbeams): - varname = "cor%d" % (i+1) - alldata[varname].append(ens_data['CData'][i, :]) + varname = "cor%d" % (i + 1) + alldata[varname].append(ens_data["CData"][i, :]) for i in range(nslantbeams): - varname = "att%d" % (i+1) - alldata[varname].append(ens_data['IData'][i, :]) + varname = "att%d" % (i + 1) + alldata[varname].append(ens_data["IData"][i, :]) - if 'GData' in ens_data: + if "GData" in ens_data: for i in range(nslantbeams): - varname = "PGd%d" % (i+1) - alldata[varname].append(ens_data['GData'][i, :]) - - alldata['Rec'].append(ens_data['VLeader']['Ensemble_Number']) - alldata['Hdg'].append(ens_data['VLeader']['Heading']) - alldata['Ptch'].append(ens_data['VLeader']['Pitch']) - alldata['Roll'].append(ens_data['VLeader']['Roll']) - alldata['HdgSTD'].append(ens_data['VLeader']['H/Hdg_Std_Dev']) - alldata['PtchSTD'].append(ens_data['VLeader']['P/Pitch_Std_Dev']) - alldata['RollSTD'].append(ens_data['VLeader']['R/Roll_Std_Dev']) - alldata['Tx'].append(ens_data['VLeader']['Temperature']) - alldata['S'].append(ens_data['VLeader']['Salinity']) - alldata['xmitc'].append(ens_data['VLeader']['Xmit_Current']) - alldata['xmitv'].append(ens_data['VLeader']['Xmit_Voltage']) - alldata['Ambient_Temp'].append(ens_data['VLeader']['Ambient_Temp']) - alldata['Pressure+'].append(ens_data['VLeader']['Pressure_(+)']) - alldata['Pressure-'].append(ens_data['VLeader']['Pressure_(-)']) - alldata['Attitude_Temp'].append(ens_data['VLeader']['Attitude_Temp']) - alldata['EWD1'].append(int(ens_data['VLeader']['Error_Status_Word_Low_16_bits_LSB'])) - alldata['EWD2'].append(int(ens_data['VLeader']['Error_Status_Word_Low_16_bits_MSB'])) - alldata['EWD3'].append(int(ens_data['VLeader']['Error_Status_Word_High_16_bits_LSB'])) - alldata['EWD4'].append(int(ens_data['VLeader']['Error_Status_Word_High_16_bits_MSB'])) - - if ens_data['FLeader']['Depth_sensor_available'] == 'Yes': - if 'Pressure' not in alldata: - alldata['Pressure'] = [] - alldata['PressVar'] = [] - alldata['Pressure'].append(ens_data['VLeader']['Pressure_deca-pascals']) - alldata['PressVar'].append(ens_data['VLeader']['Pressure_variance_deca-pascals']) + varname = "PGd%d" % (i + 1) + alldata[varname].append(ens_data["GData"][i, :]) + + alldata["Rec"].append(ens_data["VLeader"]["Ensemble_Number"]) + alldata["Hdg"].append(ens_data["VLeader"]["Heading"]) + alldata["Ptch"].append(ens_data["VLeader"]["Pitch"]) + alldata["Roll"].append(ens_data["VLeader"]["Roll"]) + alldata["HdgSTD"].append(ens_data["VLeader"]["H/Hdg_Std_Dev"]) + alldata["PtchSTD"].append(ens_data["VLeader"]["P/Pitch_Std_Dev"]) + alldata["RollSTD"].append(ens_data["VLeader"]["R/Roll_Std_Dev"]) + alldata["Tx"].append(ens_data["VLeader"]["Temperature"]) + alldata["S"].append(ens_data["VLeader"]["Salinity"]) + alldata["xmitc"].append(ens_data["VLeader"]["Xmit_Current"]) + alldata["xmitv"].append(ens_data["VLeader"]["Xmit_Voltage"]) + alldata["Ambient_Temp"].append(ens_data["VLeader"]["Ambient_Temp"]) + alldata["Pressure+"].append(ens_data["VLeader"]["Pressure_(+)"]) + alldata["Pressure-"].append(ens_data["VLeader"]["Pressure_(-)"]) + alldata["Attitude_Temp"].append(ens_data["VLeader"]["Attitude_Temp"]) + alldata["EWD1"].append( + int(ens_data["VLeader"]["Error_Status_Word_Low_16_bits_LSB"]) + ) + alldata["EWD2"].append( + int(ens_data["VLeader"]["Error_Status_Word_Low_16_bits_MSB"]) + ) + alldata["EWD3"].append( + int(ens_data["VLeader"]["Error_Status_Word_High_16_bits_LSB"]) + ) + alldata["EWD4"].append( + int(ens_data["VLeader"]["Error_Status_Word_High_16_bits_MSB"]) + ) + + if ens_data["FLeader"]["Depth_sensor_available"] == "Yes": + if "Pressure" not in alldata: + alldata["Pressure"] = [] + alldata["PressVar"] = [] + alldata["Pressure"].append(ens_data["VLeader"]["Pressure_deca-pascals"]) + alldata["PressVar"].append( + ens_data["VLeader"]["Pressure_variance_deca-pascals"] + ) # add bottom track data write to cdf here - if 'BTData' in ens_data: - if ens_data['BTData']['Mode'] == 0: - varobj = cdf.variables['BTRmin'] - varobj[netcdf_index] = ens_data['BTData']['Ref_Layer_Min'] - varobj = cdf.variables['BTRnear'] - varobj[netcdf_index] = ens_data['BTData']['Ref_Layer_Near'] - varobj = cdf.variables['BTRfar'] - varobj[netcdf_index] = ens_data['BTData']['Ref_Layer_Far'] - - varnames = ('BTWe', 'BTWu', 'BTWv', 'BTWd') + if "BTData" in ens_data: + if ens_data["BTData"]["Mode"] == 0: + varobj = cdf.variables["BTRmin"] + varobj[netcdf_index] = ens_data["BTData"]["Ref_Layer_Min"] + varobj = cdf.variables["BTRnear"] + varobj[netcdf_index] = ens_data["BTData"]["Ref_Layer_Near"] + varobj = cdf.variables["BTRfar"] + varobj[netcdf_index] = ens_data["BTData"]["Ref_Layer_Far"] + + varnames = ("BTWe", "BTWu", "BTWv", "BTWd") for i in range(nslantbeams): - varname = "BTR%d" % (i+1) + varname = "BTR%d" % (i + 1) varobj = cdf.variables[varname] - varobj[netcdf_index] = ens_data['BTData']['BT_Range'][i] - if ens_data['FLeader']['Coord_Transform'] == 'EARTH': + varobj[netcdf_index] = ens_data["BTData"]["BT_Range"][i] + if ens_data["FLeader"]["Coord_Transform"] == "EARTH": varobj = cdf.variables[varnames[i]] else: - varname = "BTV%d" % (i+1) + varname = "BTV%d" % (i + 1) varobj = cdf.variables[varname] - varobj[netcdf_index] = ens_data['BTData']['BT_Vel'][i] - varname = "BTc%d" % (i+1) + varobj[netcdf_index] = ens_data["BTData"]["BT_Vel"][i] + varname = "BTc%d" % (i + 1) varobj = cdf.variables[varname] - varobj[netcdf_index] = ens_data['BTData']['BT_Corr'][i] - varname = "BTe%d" % (i+1) + varobj[netcdf_index] = ens_data["BTData"]["BT_Corr"][i] + varname = "BTe%d" % (i + 1) varobj = cdf.variables[varname] - varobj[netcdf_index] = ens_data['BTData']['BT_Amp'][i] - varname = "BTp%d" % (i+1) + varobj[netcdf_index] = ens_data["BTData"]["BT_Amp"][i] + varname = "BTp%d" % (i + 1) varobj = cdf.variables[varname] - varobj[netcdf_index] = ens_data['BTData']['BT_PGd'][i] - varname = "BTRSSI%d" % (i+1) + varobj[netcdf_index] = ens_data["BTData"]["BT_PGd"][i] + varname = "BTRSSI%d" % (i + 1) varobj = cdf.variables[varname] - varobj[netcdf_index] = ens_data['BTData']['RSSI_Amp'][i] + varobj[netcdf_index] = ens_data["BTData"]["RSSI_Amp"][i] - if ens_data['BTData']['Mode'] == 0: - varobj[netcdf_index] = ens_data['BTData']['Ref_Layer_Vel'][i] - varname = "BTRc%d" % (i+1) + if ens_data["BTData"]["Mode"] == 0: + varobj[netcdf_index] = ens_data["BTData"]["Ref_Layer_Vel"][i] + varname = "BTRc%d" % (i + 1) varobj = cdf.variables[varname] - varobj[netcdf_index] = ens_data['BTData']['Ref_Layer_Corr'][i] - varname = "BTRi%d" % (i+1) + varobj[netcdf_index] = ens_data["BTData"]["Ref_Layer_Corr"][i] + varname = "BTRi%d" % (i + 1) varobj = cdf.variables[varname] - varobj[netcdf_index] = ens_data['BTData']['Ref_Layer_Amp'][i] - varname = "BTRp%d" % (i+1) + varobj[netcdf_index] = ens_data["BTData"]["Ref_Layer_Amp"][i] + varname = "BTRp%d" % (i + 1) varobj = cdf.variables[varname] - varobj[netcdf_index] = ens_data['BTData']['Ref_Layer_PGd'][i] - - if 'VBeamVData' in ens_data: - if ens_data['VBeamLeader']['Vertical_Depth_Cells'] == ens_data['FLeader']['Number_of_Cells']: - if 'vel5' not in alldata: - alldata['vel5'] = [] - alldata['cor5'] = [] - alldata['att5'] = [] - alldata['vel5'].append(ens_data['VBeamVData']) - alldata['cor5'].append(ens_data['VBeamCData']) - alldata['att5'].append(ens_data['VBeamIData']) - if 'VBeamGData' in ens_data: - if 'PGd5' not in alldata: - alldata['PGd5'] = [] - alldata['PGd5'].append(ens_data['VBeamGData']) - - if 'WaveParams' in ens_data: + varobj[netcdf_index] = ens_data["BTData"]["Ref_Layer_PGd"][i] + + if "VBeamVData" in ens_data: + if ( + ens_data["VBeamLeader"]["Vertical_Depth_Cells"] + == ens_data["FLeader"]["Number_of_Cells"] + ): + if "vel5" not in alldata: + alldata["vel5"] = [] + alldata["cor5"] = [] + alldata["att5"] = [] + alldata["vel5"].append(ens_data["VBeamVData"]) + alldata["cor5"].append(ens_data["VBeamCData"]) + alldata["att5"].append(ens_data["VBeamIData"]) + if "VBeamGData" in ens_data: + if "PGd5" not in alldata: + alldata["PGd5"] = [] + alldata["PGd5"].append(ens_data["VBeamGData"]) + + if "WaveParams" in ens_data: # we can get away with this because the key names and var names are the same - for key in ens_data['WaveParams']: + for key in ens_data["WaveParams"]: if key not in alldata: alldata[key] = [] - alldata[key].append(ens_data['WaveParams'][key]) + alldata[key].append(ens_data["WaveParams"][key]) - if 'WaveSeaSwell' in ens_data: + if "WaveSeaSwell" in ens_data: # we can get away with this because the key names and var names are the same - for key in ens_data['WaveSeaSwell']: + for key in ens_data["WaveSeaSwell"]: if key not in alldata: alldata[key] = [] - alldata[key].append(ens_data['WaveSeaSwell'][key]) + alldata[key].append(ens_data["WaveSeaSwell"][key]) netcdf_index += 1 - elif ens_error == 'no ID': - print('Stopping because ID tracking lost') + elif ens_error == "no ID": + print("Stopping because ID tracking lost") infile.close() # cdf.close() sys.exit(1) @@ -272,18 +331,24 @@ def convert_pd0_to_netcdf(pd0File, good_ens, serial_number, time_type, delta_t): ensemble_count += 1 if ensemble_count > maxens: - print('stopping at estimated end of file ensemble %d' % ens2process[1]) + print("stopping at estimated end of file ensemble %d" % ens2process[1]) break n = 10000 - ensf, ensi = math.modf(ensemble_count/n) + ensf, ensi = math.modf(ensemble_count / n) if ensf == 0: - print('%d ensembles read at %s and TRDI #%d' % (ensemble_count, ens_data['VLeader']['dtobj'], - ens_data['VLeader']['Ensemble_Number'])) - - if ensemble_count >= ens2process[1]-1: - print('stopping at requested ensemble %d' % ens2process[1]) + print( + "%d ensembles read at %s and TRDI #%d" + % ( + ensemble_count, + ens_data["VLeader"]["dtobj"], + ens_data["VLeader"]["Ensemble_Number"], + ) + ) + + if ensemble_count >= ens2process[1] - 1: + print("stopping at requested ensemble %d" % ens2process[1]) break # note that ensemble lengths can change in the middle of the file! @@ -297,14 +362,14 @@ def convert_pd0_to_netcdf(pd0File, good_ens, serial_number, time_type, delta_t): if header is None: # we presume this is the end of the file, since we don't have header info - print('end of file reached with incomplete header') + print("end of file reached with incomplete header") break - if header['sourceID'] != b'\x7f': - print('non-currents ensemble found at %d' % bookmark) + if header["sourceID"] != b"\x7f": + print("non-currents ensemble found at %d" % bookmark) - if ens_len != header['nbytesperens']+2: - ens_len = header['nbytesperens']+2 # update to what we have + if ens_len != header["nbytesperens"] + 2: + ens_len = header["nbytesperens"] + 2 # update to what we have # TODO - fix this so that we aren't going back and forth, it is really slow # go back to where this ensemble started before we checked the header @@ -312,20 +377,26 @@ def convert_pd0_to_netcdf(pd0File, good_ens, serial_number, time_type, delta_t): ens = infile.read(ens_len) else: # while len(ens) > 0: - print('end of file reached') + print("end of file reached") if ensemble_count < maxens: - print('end of file reached after %d ensembles, less than estimated in the file' % ensemble_count) + print( + "end of file reached after %d ensembles, less than estimated in the file" + % ensemble_count + ) elif ensemble_count > maxens: - print('end of file reached after %d ensembles, more than estimated in the file' % ensemble_count) + print( + "end of file reached after %d ensembles, more than estimated in the file" + % ensemble_count + ) infile.close() # cdf.close() - print('%d ensembles read, %d records written' % (ensemble_count, netcdf_index)) + print("%d ensembles read, %d records written" % (ensemble_count, netcdf_index)) for k in alldata.keys(): - if k!= 'FLeader': + if k != "FLeader": alldata[k] = np.array(alldata[k]) return ensemble_count, netcdf_index, ens_error, alldata @@ -370,7 +441,7 @@ def write_dict_to_cdf_attributes(netcdf_object, d, tag): try: netcdf_object.setncattr(newkey, d[key]) except: - print('can\'t set %s attribute' % key) + print("can't set %s attribute" % key) return d @@ -385,116 +456,126 @@ def parse_TRDI_ensemble(ensbytes, verbose): """ ens_data = {} ens_error = None - ens_data['Header'] = parse_TRDI_header(ensbytes) + ens_data["Header"] = parse_TRDI_header(ensbytes) - for i in range(ens_data['Header']['ndatatypes']): + for i in range(ens_data["Header"]["ndatatypes"]): # go to each offset and parse depending on what we find - offset = ens_data['Header']['offsets'][i] + offset = ens_data["Header"]["offsets"][i] # raw, val = __parseTRDIushort(ensbytes, offset) - val = struct.unpack(' 2: mo -= 3 yr = year - c = math.floor(yr/100) - yr = yr - c*100 + c = math.floor(yr / 100) + yr = yr - c * 100 d = day - j = math.floor((146097*c)/4)+math.floor((1461*yr)/4) + \ - math.floor((153*mo + 2)/5)+d+1721119 + j = ( + math.floor((146097 * c) / 4) + + math.floor((1461 * yr) / 4) + + math.floor((153 * mo + 2) / 5) + + d + + 1721119 + ) # If you want julian days to start and end at noon, # replace the following line with: # j=j+(decimalhrs-12)/24; - j = j+decimalhrs/24 + j = j + decimalhrs / 24 return j @@ -1688,34 +2007,34 @@ def analyzepd0file(pd0file, verbose=False): :return: number of ensembles in file, number of bytes in each ensemble, data from the first ensemble, number of bytes to the start of the data """ - infile = open(pd0file, 'rb') + infile = open(pd0file, "rb") while infile.tell() < 3000: b1 = infile.read(1) - if b1 == b'\x7f': + if b1 == b"\x7f": b2 = infile.read(1) - if b2 == b'\x7f': + if b2 == b"\x7f": break else: - print('Desired TRDI 7f7f ID not found within 3 kB from beginning of the file') + print("Desired TRDI 7f7f ID not found within 3 kB from beginning of the file") infile.close() sys.exit(1) - start_of_data = infile.tell()-2 + start_of_data = infile.tell() - 2 if start_of_data != 0: - print('data starts %d bytes into the file' % start_of_data) + print("data starts %d bytes into the file" % start_of_data) infile.seek(start_of_data) # need to read the header from the file to know the ensemble size header = read_TRDI_header(infile) - if header['sourceID'] != b'\x7f': - print('error - this is not a currents file') + if header["sourceID"] != b"\x7f": + print("error - this is not a currents file") infile.close() # number of bytes per ensemble in the header does not include the checksum - ens_len = header['nbytesperens']+2 - print('ensemble length = %g' % ens_len) + ens_len = header["nbytesperens"] + 2 + print("ensemble length = %g" % ens_len) print(header) # it is faster to define the netCDF file with a known length # for this we need to estimate how many ensembles we will be reading @@ -1736,21 +2055,26 @@ def analyzepd0file(pd0file, verbose=False): for i in range(nens2check): fileposn = infile.tell() header = read_TRDI_header(infile) - ens_len = header['nbytesperens']+2 + ens_len = header["nbytesperens"] + 2 infile.seek(fileposn) ens_data, ens_error = parse_TRDI_ensemble(infile.read(ens_len), verbose) if ens_error is not None: - print('problem reading the first ensemble: ' + ens_error) + print("problem reading the first ensemble: " + ens_error) # infile.close() # sys.exit(1) if i == 0: first_ens_data = ens_data - print('ensemble %d has %d bytes and %d datatypes' % (ens_data['VLeader']['Ensemble_Number'], - ens_data['Header']['nbytesperens'], - ens_data['Header']['ndatatypes'])) - nbytesperens[i] = ens_data['Header']['nbytesperens']+2 - ndatatypes[i] = ens_data['Header']['ndatatypes'] + print( + "ensemble %d has %d bytes and %d datatypes" + % ( + ens_data["VLeader"]["Ensemble_Number"], + ens_data["Header"]["nbytesperens"], + ens_data["Header"]["ndatatypes"], + ) + ) + nbytesperens[i] = ens_data["Header"]["nbytesperens"] + 2 + ndatatypes[i] = ens_data["Header"]["ndatatypes"] # the guess here is that if the first two ensembles are not the same, # it's the second ensemble that is representative of the data @@ -1761,14 +2085,16 @@ def analyzepd0file(pd0file, verbose=False): infile.seek(0, 2) nbytesinfile = infile.tell() - max_ens = (nbytesinfile/ens_len)-1 - print('estimating %g ensembles in file using a %d ensemble size' % (max_ens, ens_len)) + max_ens = (nbytesinfile / ens_len) - 1 + print( + "estimating %g ensembles in file using a %d ensemble size" % (max_ens, ens_len) + ) infile.close() - print(ens_data['Header']) - print('ensemble length = %g' % ens_len) - print('estimating %g ensembles in file' % max_ens) + print(ens_data["Header"]) + print("ensemble length = %g" % ens_len) + print("estimating %g ensembles in file" % max_ens) # return max_ens, ens_len, ens_data, start_of_data return max_ens, ens_len, first_ens_data, start_of_data diff --git a/stglib/rsk/cdf2nc.py b/stglib/rsk/cdf2nc.py index e0672512..34697102 100755 --- a/stglib/rsk/cdf2nc.py +++ b/stglib/rsk/cdf2nc.py @@ -1,7 +1,7 @@ import numpy as np import xarray as xr -from ..core import qaqc, utils +from ..core import filter, qaqc, utils def cdf_to_nc(cdf_filename, atmpres=None, writefile=True, format="NETCDF4"): @@ -50,6 +50,10 @@ def cdf_to_nc(cdf_filename, atmpres=None, writefile=True, format="NETCDF4"): # trim by minimum pressure for instruments that go out of water_depth for v in ["P_1", "P_1ac"]: + # check if any filtering before other qaqc + ds = filter.apply_butter_filt(ds, v) + ds = filter.apply_med_filt(ds, v) + ds = trim_min(ds, v) ds = qaqc.trim_bad_ens(ds, v) diff --git a/stglib/sg.py b/stglib/sg.py index 62f04c81..1e31d8d1 100644 --- a/stglib/sg.py +++ b/stglib/sg.py @@ -1,7 +1,7 @@ import pandas as pd import xarray as xr -from .core import qaqc, utils +from .core import filter, qaqc, utils def read_tid(filnam, encoding="utf-8"): @@ -161,6 +161,10 @@ def sg_qaqc(ds): [varlist.append(k) for k in ds.data_vars if k not in varlist] for var in varlist: + # check if any filtering before other qaqc + ds = filter.apply_butter_filt(ds, var) + ds = filter.apply_med_filt(ds, var) + ds = qaqc.trim_min(ds, var) ds = qaqc.trim_max(ds, var) diff --git a/stglib/sig/cdf2nc.py b/stglib/sig/cdf2nc.py index 5a964972..afce2198 100644 --- a/stglib/sig/cdf2nc.py +++ b/stglib/sig/cdf2nc.py @@ -6,7 +6,7 @@ from dask.diagnostics import ProgressBar from ..aqd import aqdutils -from ..core import utils +from ..core import filter, qaqc, utils # import os @@ -132,6 +132,39 @@ def cdf_to_nc(cdf_filename, atmpres=False): # Add min/max values ds = utils.add_min_max(ds) + # qaqc + for var in ds.data_vars: + # need to do this or else a "coordinates" attribute with value of "burst" hangs around + # ds[var].encoding["coordinates"] = None + + # check for any filtering first + ds = filter.apply_butter_filt(ds, var) + ds = filter.apply_med_filt(ds, var) + + ds = qaqc.trim_min(ds, var) + ds = qaqc.trim_max(ds, var) + ds = qaqc.trim_min_diff(ds, var) + ds = qaqc.trim_min_diff_pct(ds, var) + ds = qaqc.trim_max_diff(ds, var) + ds = qaqc.trim_maxabs_diff_2d(ds, var) + ds = qaqc.trim_max_diff_pct(ds, var) + ds = qaqc.trim_med_diff(ds, var) + ds = qaqc.trim_med_diff_pct(ds, var) + ds = qaqc.trim_max_blip(ds, var) + ds = qaqc.trim_max_blip_pct(ds, var) + ds = qaqc.trim_bad_ens(ds, var) + ds = qaqc.trim_bad_ens_indiv(ds, var) + ds = qaqc.trim_fliers(ds, var) + ds = qaqc.trim_warmup(ds, var) + + # after check for masking vars by other vars + for var in ds.data_vars: + ds = qaqc.trim_mask(ds, var) + + # fill with AGC and Cor threshold + ds = aqdutils.fill_agc(ds) + ds = aqdutils.fill_cor(ds) + # write out nc file by data_type if "prefix" in ds.attrs: nc_filename = ds.attrs["prefix"] + ds.attrs["filename"] diff --git a/stglib/tests/data/config_1126vec14823.yaml b/stglib/tests/data/config_1126vec14823.yaml index b2cab9f8..a01b580b 100644 --- a/stglib/tests/data/config_1126vec14823.yaml +++ b/stglib/tests/data/config_1126vec14823.yaml @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52b9b6e482e46b63481c5a222bf86fd4d0f8280b507db5a48fe53f7eeaabd629 -size 587 +oid sha256:5033cd287fcfd4496e1592259cb1483b24df9e8bee87ecf5ae6f924536d7d8f1 +size 737 diff --git a/stglib/tests/data/sig1126_config.yaml b/stglib/tests/data/sig1126_config.yaml index 88aa2557..0a45e80d 100644 --- a/stglib/tests/data/sig1126_config.yaml +++ b/stglib/tests/data/sig1126_config.yaml @@ -10,6 +10,10 @@ initial_instrument_height_note: 'From seabed' zeroed_pressure: 'Yes' # was pressure zeroed before deployment trim_method: 'water level sl' # Water Level SL trims bin if any part of bin or side lobe is out of water - works best when pressure is corrected for atmospheric chunks: ['time', 25000, 'bindist', 4] +u_1205_highpass_filt: 50 #cutoff in seconds +v_1206_lowpass_filt: 2 #cutoff in seconds +w_1204_bandpass_filt: [50, 2] #cutoffs in seconds [long_period (low_freq), short_period(hi_freq)] +P_1_med_filt: 9 #kernel_size/points #P_1ac_note: 'Corrected for variations in atmospheric pressure using MesoWest Station FW0901 Wellfleet.' #u_1205_bad_ens: [2019-01-22 09:01, 2019-01-22 13:16, 2019-01-22 21:45, 2019-01-23 02:16,2019-01-23 09:16, 2019-01-23 13:31,2019-02-01 09:01, 2019-02-01 18:16, 2019-02-02 21:31, 2019-02-02 22:16, 2019-02-03 08:01, 2019-02-03 10:46, 2019-02-03 20:01, 2019-02-04 00:31] #v_1206_bad_ens: [2019-01-22 09:01, 2019-01-22 13:16, 2019-01-22 21:45, 2019-01-23 02:16,2019-01-23 09:16, 2019-01-23 13:31,2019-02-01 09:01, 2019-02-01 18:16, 2019-02-02 21:31, 2019-02-02 22:16, 2019-02-03 08:01, 2019-02-03 10:46, 2019-02-03 20:01, 2019-02-04 00:31] diff --git a/stglib/troll.py b/stglib/troll.py index b2bef286..c034c0b5 100644 --- a/stglib/troll.py +++ b/stglib/troll.py @@ -399,13 +399,7 @@ def compute_density(T, S): - 1.120083e-6 * T**4 + 6.536332e-9 * T**5 ) - a = ( - 0.824493 - - 0.004089 * T - + 7.6438e-5 * T**2 - - 8.2467e-7 * T**3 - + 5.3875e-9 * T**4 - ) + a = 0.824493 - 0.004089 * T + 7.6438e-5 * T**2 - 8.2467e-7 * T**3 + 5.3875e-9 * T**4 b = -0.00572466 + 1.0227e-4 * T - 1.6546e-6 * T**2 c = 0.000483140 diff --git a/stglib/vec/cdf2nc.py b/stglib/vec/cdf2nc.py index 37976f87..14987ef5 100644 --- a/stglib/vec/cdf2nc.py +++ b/stglib/vec/cdf2nc.py @@ -7,7 +7,7 @@ from tqdm import tqdm from ..aqd import aqdutils -from ..core import qaqc, transform, utils +from ..core import filter, qaqc, transform, utils def cdf_to_nc(cdf_filename, atmpres=False): @@ -60,6 +60,11 @@ def cdf_to_nc(cdf_filename, atmpres=False): for var in ds.data_vars: # need to do this or else a "coordinates" attribute with value of "burst" hangs around ds[var].encoding["coordinates"] = None + + # check if any filtering before other qaqc + ds = filter.apply_butter_filt(ds, var) + ds = filter.apply_med_filt(ds, var) + ds = qaqc.trim_min(ds, var) ds = qaqc.trim_max(ds, var) ds = qaqc.trim_maxabs_diff(ds, var)