Skip to content

Commit

Permalink
Add filtering/smoothing options (#261)
Browse files Browse the repository at this point in the history
* add signal filtering option

* add filters updates and tests

* addfilters clean-up

* filter order options

* fix code comments

---------

Co-authored-by: ssuttles-usgs <ssuttles@usgs.govgit config -lgit config --global user.email ssuttles@usgs.gov>
  • Loading branch information
ssuttles-usgs and ssuttles-usgs authored Oct 11, 2024
1 parent c7d760c commit fa7e4a0
Show file tree
Hide file tree
Showing 13 changed files with 1,317 additions and 725 deletions.
8 changes: 8 additions & 0 deletions doc/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ Options applicable to many instrument types include:
- ``<VAR>_mask``: a single variable or list of variables which should be used to fill the given variable. For example ``u_1205_mask: ["cor1_1285", "cor2_1286", "cor3_1287"]`` will set ``u_1205`` to ``_FillValue`` wherever the correlation variables are ``_FillValue``
- ``drop_vars``: a list of variables to be removed from the final file. For example, ``drop_vars: ['nLF_Cond_µS_per_cm', 'Wiper_Position_volt', 'Cable_Pwr_V']``.

Options for signal filtering:

- ``<VAR>_lowpass_filt``: apply butterworth lowpass filter with specified cutoff period in seconds.
- ``<VAR>_highpass_filt``: apply butterworth highpass filter with specified cutoff period in seconds.
- ``<VAR>_bandpass_filt``: apply butterworth bandpass filter with specified cutoff period in seconds as two element list [cut_long, cut_short].
- ``<VAR>_med_filt``: apply n point median filter, where n is specified value (must be an odd number).
- ``filter_order``: specify order of butterworth filter (default = 4 if not specified).

Aquadopp
--------

Expand Down
2 changes: 1 addition & 1 deletion stglib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
wxt,
)
from .aqd import aqdutils
from .core import cmd, utils, waves
from .core import cmd, filter, qaqc, utils, waves
from .core.utils import read_globalatts

__version__ = _version.get_versions()["version"]
5 changes: 4 additions & 1 deletion stglib/aqd/aqdutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,19 +818,22 @@ def check_attrs(ds, waves=False, hr=False, inst_type="AQD"):
ds.attrs["frequency"] = ds.attrs["SIGHeadFrequency"]
ds.attrs["instrument_type"] = ds.attrs["SIGInstrumentName"]

# find bin_size attribute
# find bin_size and sample_rate attributes
if (
ds.attrs["data_type"].upper() == "BURST"
or ds.attrs["data_type"].upper() == "IBURST"
):
ds.attrs["bin_size"] = ds.attrs["SIGBurst_CellSize"]
ds.attrs["sample_rate"] = ds.attrs["SIGBurst_SamplingRate"]
elif (
ds.attrs["data_type"].upper() == "BURSTHR"
or ds.attrs["data_type"].upper() == "IBURSTHR"
):
ds.attrs["bin_size"] = ds.attrs["SIGBurstHR_CellSize"]
ds.attrs["sample_rate"] = ds.attrs["SIGBurst_SamplingRate"]
elif ds.attrs["data_type"].upper() == "ECHO1":
ds.attrs["bin_size"] = ds.attrs["SIGEchoSounder_CellSize"]
ds.attrs["sample_rate"] = ds.attrs["SIGBurst_SamplingRate"]
elif ds.attrs["data_type"].upper() == "AVERAGE":
ds.attrs["bin_size"] = ds.attrs["SIGAverage_CellSize"]
elif ds.attrs["data_type"].upper() == "ALT_AVERAGE":
Expand Down
6 changes: 5 additions & 1 deletion stglib/aqd/hrcdf2nc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import xarray as xr

from ..core import qaqc, utils
from ..core import filter, qaqc, utils
from . import aqdutils


Expand Down Expand Up @@ -92,6 +92,10 @@ def cdf_to_nc(cdf_filename, atmpres=False):

# should function this
for var in VEL.data_vars:
# check if any filtering before other qaqc
VEL = filter.apply_butter_filt(VEL, var)
VEL = filter.apply_med_filt(VEL, var)

VEL = qaqc.trim_min(VEL, var)
VEL = qaqc.trim_max(VEL, var)
VEL = qaqc.trim_min_diff(VEL, var)
Expand Down
207 changes: 207 additions & 0 deletions stglib/core/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import numpy as np
import scipy.signal as spsig
import xarray as xr

from . import utils


def butter_filt(sig, sr, cutfreq, ftype, ford=4):
"""
butterworth filter using sosfiltfilt in scipy.signal
Parameters
----------
sig - signal to be filtered (array)
sr = sample rate of signal (Hz)
cutfreq = cutoff frequency for filter (Hz) length = 1 or 2 (for bandpass)
ftype = type of filter, options = ['lowpass', 'highpass', 'bandpass']
ford = filter order (default = 4)
Returns
-------
filtered signal using specified order (default = 4) butterworth filter
"""
sos = spsig.butter(ford, cutfreq, btype=ftype, fs=sr, output="sos")

return spsig.sosfiltfilt(sos, sig)


def make_butter_filt(ds, var, sr, cutfreq, ftype):
"""
Create smoothed data using specified butterworth filter type, order, and cutoff for user specified varaibles
Parameters
----------
ds - xarray dataset that contains user specified varaible
var - user specified variable
sr - sample rate of data (hertz)
cutfreq - cutoff frequency(s) for filter (hertz)
ftype - user specified filter type (lowpass, highpass, or bandpass)
Returns
-------
ds - dataset with specified variable smoothed/filtered with the specified butterworth filter type, order, and cutoff
"""
if "filter_order" in ds.attrs:
ford = ds.attrs["filter_order"]
else:
ford = 4

if ds[var].ndim == 1 and "time" in ds[var].dims:
print(f"Applying {ftype} filter to {var}")
filtered = butter_filt(ds[var].values, sr, cutfreq, ftype, ford)
ds[var][:] = filtered

notetxt = f"Values filtered using order = {ford} butterworth {ftype} filter with {cutfreq} cutoff frequency. "
ds = utils.insert_note(ds, var, notetxt)

elif ds[var].ndim == 2 and "time" in ds[var].dims and "sample" in ds[var].dims:
print(f"Applying {ftype} filter to {var} burst data")
for k in ds["time"]:

filtered = butter_filt(ds[var].sel(time=k).values, sr, cutfreq, ftype, ford)
ds[var].loc[dict(time=k)] = filtered

notetxt = f"Values filtered using order = {ford} butterworth {ftype} filter with {cutfreq} Hz cutoff frequency. "
ds = utils.insert_note(ds, var, notetxt)

elif ds[var].ndim == 2 and "time" in ds[var].dims and "z" in ds[var].dims:
print(f"Applying {ftype} filter to {var} profile data")
for k in ds["z"]:

filtered = butter_filt(ds[var].sel(z=k).values, sr, cutfreq, ftype, ford)
ds[var].loc[dict(z=k)] = filtered

notetxt = f"Values filtered using order = {ford} butterworth {ftype} filter with {cutfreq} Hz cutoff frequency. "
ds = utils.insert_note(ds, var, notetxt)

else:
print(
f"Not able to apply {ftype} filter because only 'time' , ('time','sample'), or ('time','z') dimensions are handled and {var} dims are {ds[var].dims}"
)

return ds


def apply_butter_filt(ds, var):
"""
Construct and call butterworth filter from user specified config.yaml file
Parameters
----------
ds - xarray dataset with user specified variable
var - user specified variable
Returns
-------
ds - dataset with specified variable smoothed/filtered with the specified butterworth filter type, order, and cutoff
"""
if (
var + "_lowpass_filt" in ds.attrs
or var + "_highpass_filt" in ds.attrs
or var + "_bandpass_filt" in ds.attrs
):

if (
"sample_rate" in ds.attrs or "sample_interval" in ds.attrs
): # check to make sure sample_rate or sample_intreval attributes exits.
if "sample_rate" in ds.attrs:
sr = ds.attrs["sample_rate"]
else:
sr = 1 / ds.attrs["sample_interval"]

if var + "_lowpass_filt" in ds.attrs:
ftype = "lowpass"
cutfreq = 1 / ds.attrs[var + "_lowpass_filt"]
ds = make_butter_filt(ds, var, sr, cutfreq, ftype)

elif var + "_highpass_filt" in ds.attrs:
ftype = "highpass"
cutfreq = 1 / ds.attrs[var + "_highpass_filt"]
ds = make_butter_filt(ds, var, sr, cutfreq, ftype)

elif var + "_bandpass_filt" in ds.attrs:
ftype = "bandpass"
cutfreq_lo = 1 / ds.attrs[var + "_bandpass_filt"][0]
cutfreq_hi = 1 / ds.attrs[var + "_bandpass_filt"][1]
cutfreq = [cutfreq_lo, cutfreq_hi]
print(cutfreq)
ds = make_butter_filt(ds, var, sr, cutfreq, ftype)

else:
raise ValueError(
f"sample_rate or sample _interval do not exits in global attributes, can not apply lowpass filter to {var}. "
)

return ds


def apply_med_filt(ds, var):
"""
Construct and apply N-point median filter to user specified variable and N
Parameters
----------
ds - xarray dataset with user specified variable
var - user specified variable
Returns
-------
ds - dataset with user specified variable smoothed/filtered with the user specified N points (kernel size).
"""
if var + "_med_filt" in ds.attrs:

kernel_size = ds.attrs[var + "_med_filt"]
# make sure kernel_size is odd number
if kernel_size % 2 == 1:

if ds[var].ndim == 1 and "time" in ds[var].dims:
print(f"Applying {kernel_size} point median filter to {var}")
filtered = spsig.medfilt(ds[var].values, kernel_size)
ds[var][:] = filtered

notetxt = f"Values filtered using {kernel_size} point median filter. "
ds = utils.insert_note(ds, var, notetxt)

elif (
ds[var].ndim == 2
and "time" in ds[var].dims
and "sample" in ds[var].dims
):
print(f"Applying {kernel_size} point median filter to {var} burst data")

for k in ds["time"]:

filtered = spsig.medfilt(ds[var].sel(time=k).values, kernel_size)
ds[var].loc[dict(time=k)] = filtered

notetxt = f"Values filtered using {kernel_size} point median filter. "
ds = utils.insert_note(ds, var, notetxt)

elif ds[var].ndim == 2 and "time" in ds[var].dims and "z" in ds[var].dims:
print(
f"Applying {kernel_size} point median filter to {var} profile data"
)

for k in ds["z"]:

filtered = spsig.medfilt(ds[var].sel(z=k).values, kernel_size)
ds[var].loc[dict(z=k)] = filtered

notetxt = f"Values filtered using {kernel_size} point median filter. "
ds = utils.insert_note(ds, var, notetxt)

else:
print(
f"Not able to apply median filter because only 'time', ('time','sample'), or ('time', 'z') dimensions are handled and {var} dims are {ds[var].dims}"
)

else:
raise ValueError(
f"Not able to apply median filter because kernel size specified {kernel_size} is not an odd whole number"
)

return ds
Loading

0 comments on commit fa7e4a0

Please sign in to comment.