-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- use interpolating splines for neighbours > 2 (precisely: for more than 4 data points to interpolate from) - unit tests. - skipping throws error (!) => don't use skip - fix detection whether testthat is attached already
- Loading branch information
Showing
3 changed files
with
112 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,53 +1,133 @@ | ||
##' Impute missing data points | ||
##' | ||
##' Replace \code{NA}s in the spectra matrix by linear interpolation. | ||
##' @rdname spc.NA.linapprox | ||
##' Replace \code{NA}s in the spectra matrix by interpolation. With | ||
##' less than 4 points available linear interpolation of the 2 neighbour points is used. For larger numbers of | ||
##' neighbour points, smoothing interpolation is performed by | ||
##' \code{\link[stats]{smooth.spline}}. | ||
##' @note The function has been renamed from \code{spc.NA.linapprox} to \code{spc.NA.approx} | ||
##' @param spc hyperSpec object with spectra matrix containing \code{NA}s | ||
##' @param neighbours how many neighbour data points should be used to fit the line | ||
##' @param ... ignored | ||
##' @param neighbours how many neighbour data points should be used to fit the | ||
##' line | ||
##' @param w,df,spar see \code{\link[stats]{smooth.spline}} | ||
##' @return hyperSpec object | ||
##' @export | ||
##' @author Claudia Beleites | ||
spc.NA.linapprox <- function (spc, neighbours = 1, ...){ | ||
##' @examples | ||
##' spc.NA.approx (fluNA [,, min ~ 410], debuglevel = 1) | ||
##' spc.NA.approx (fluNA [1,, min ~ 410], debuglevel = 2) | ||
##' spc.NA.approx (fluNA [4,, min ~ 410], neighbours = 3, df = 4, debuglevel = 2) | ||
spc.NA.approx <- function (spc, neighbours = 1, w = rep (1, 2 * neighbours), df = 1 + .Machine$double.eps, spar = NULL, | ||
debuglevel = hy.getOption("debuglevel")){ | ||
chk.hy (spc) | ||
validObject (spc) | ||
|
||
all.na <- which (apply (is.na (spc@data$spc), 1, all)) | ||
if (length (all.na) > 0){ | ||
warning ("Spectra containing only NAs found. They will not be approximated.") | ||
} | ||
|
||
stopifnot (neighbours >= 1L) | ||
|
||
ispc <- which (is.na (spc@data$spc), arr.ind = TRUE) | ||
|
||
ispc <- setdiff (unique (ispc[,"row"]), all.na) | ||
|
||
if (debuglevel == 1L) | ||
plot (spc [ispc], col = "gray") | ||
|
||
for (i in ispc){ | ||
if (debuglevel == 2L) | ||
plot (spc [i], col = "gray") | ||
|
||
nas <- which (is.na (spc@data$spc[i,])) | ||
|
||
start <- c (0, which (diff (nas) > 1)) + 1 | ||
end <- c (start [-1] - 1, length (nas)) | ||
end <- c (start [-1] - 1, length (nas)) | ||
|
||
for (j in seq (along = start)) { | ||
pts <- nas [start [j]] : nas [end [j]] | ||
|
||
xneighbours <- c ( -(1 : neighbours) + nas [start [j]], | ||
(1 : neighbours) + nas [end [j]]) | ||
xneighbours <- xneighbours [xneighbours > 0] | ||
xneighbours <- xneighbours [xneighbours < nwl (spc)] | ||
(1 : neighbours) + nas [end [j]]) | ||
mask <- xneighbours > 0 & xneighbours <= nwl (spc) | ||
xneighbours <- xneighbours [mask] | ||
|
||
if (sum (mask) == 0) {# should not happen as all NA-only spectra were excluded | ||
stop ("No data to interpolate from.") | ||
|
||
if (length (xneighbours) == 0) # should not happen as all NA-only spectra were excluded | ||
stop ("No data to interpolate from.") | ||
else if (length (xneighbours) == 1) | ||
} else if (sum (mask) == 1) { | ||
spc@data$spc [i, pts] <- spc@data$spc [i, xneighbours] | ||
else | ||
|
||
if (debuglevel == 2L) | ||
points (x = spc@wavelength [xneighbours], y = spc@data$spc [i, xneighbours]) | ||
|
||
} else if (sum (mask) < 4) { # old behaviour using linear interpolation | ||
spc@data$spc [i, pts] <- approx (x = spc@wavelength [xneighbours], | ||
y = spc@data$spc [i, xneighbours], | ||
xout = spc@wavelength [pts], | ||
method = "linear", | ||
rule = 2)$y | ||
|
||
if (debuglevel == 2L) | ||
lines (x = spc@wavelength [xneighbours], y = spc@data$spc [i, xneighbours]) | ||
} else {# more neighbours: interpolation spline | ||
spline <- smooth.spline(x = spc@wavelength [xneighbours], | ||
y = spc@data$spc [i, xneighbours], | ||
w = w [mask], df = df, spar = spar, | ||
cv = FALSE, all.knots = TRUE, keep.data = FALSE) | ||
spc@data$spc [i, pts] <- predict (spline, x = spc@wavelength [pts])$y | ||
|
||
if (debuglevel == 2L) { | ||
wlr <- seq (from = min (spc@wavelength [xneighbours]), | ||
to = max (spc@wavelength [xneighbours]), | ||
length.out = 100) | ||
lines (predict (spline, wlr)) | ||
} | ||
|
||
} | ||
|
||
if (debuglevel == 2L) | ||
plot (spc [i,,xneighbours, wl.index = TRUE], add = TRUE, lines.args = list (type = "p", pch = 20), col = 1) | ||
if (debuglevel >= 1L) | ||
plot (spc [i,,pts, wl.index = TRUE], add = TRUE, lines.args = list (type = "p", pch = 20), col = 2) | ||
} | ||
} | ||
|
||
spc | ||
} | ||
|
||
##' @rdname spc.NA.approx | ||
## TODO: remove after 2017-06-01 | ||
spc.NA.linapprox <- function (...){ | ||
stop ("spc.NA.linapprox has been renamed to spc.NA.approx") | ||
} | ||
|
||
.test (spc.NA.approx) <- function (){ | ||
context ("spc.NA.approx") | ||
|
||
test_that ("linear interpolation", { | ||
tmp <- spc.NA.approx (fluNA [-2,, min ~ 410]) | ||
expect_equivalent(tmp [[,, 406]], rowMeans (fluNA [[-2,, 405.5 ~ 406.5]], na.rm = TRUE)) | ||
}) | ||
|
||
test_that ("spline interpolation", { | ||
tmp <- spc.NA.approx (fluNA [-2,, min ~ 410], neighbours = 2) | ||
expect_equivalent (tmp [[,, 406]], rowMeans (fluNA [[-2,, 405 ~ 407]], na.rm = TRUE), | ||
tolerance = 1e-5) | ||
}) | ||
|
||
test_that ("edge treatment and debuglevel", { | ||
ranges <- list (405 ~ 407, 405.5 ~ 406.5, 405.6 ~ 406) | ||
for (d in 0 : 2) { | ||
for (r in ranges) { | ||
tmp <- spc.NA.approx (fluNA [-2,, r], neighbours = 3, debuglevel = d) | ||
expect_equivalent (tmp [[,, 406]], rowMeans (fluNA [[-2,, r]], na.rm = TRUE), | ||
tolerance = 1e-5, info = paste0 ("debuglevel = ", d, "range = ", r)) | ||
} | ||
} | ||
}) | ||
|
||
|
||
test_that ("spc.NA.linapprox deprecated",{ | ||
expect_error(spc.NA.linapprox (fluNA)) | ||
}) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters