From b053862eb3b99b5bd010d745e3868b4d3308abaa Mon Sep 17 00:00:00 2001 From: Rebecca Killick Date: Mon, 14 Oct 2024 04:51:43 +0100 Subject: [PATCH] Changed the plot(object,diagnostic=TRUE) for cpt.range objects Added in the upper bound on the pen.value.full for CROPS output --- NEWS | 2 ++ R/class_input.R | 18 +++++++++--------- R/cpt.class.R | 5 +++-- man/plot-methods.Rd | 4 ++-- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/NEWS b/NEWS index 7220de0..9b39332 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,8 @@ Version 2.3 * Added some generics for cpt.range that were missing before; nseg, seg.len. Previously these inherited from cpt, but now these can be called on the @cpts slot (default) or optionally with a ncpts argument for the specified number of changepoints. * Moved the fit functions outside of the param functions. Previously these were unnecessarily repeated across cpt, cpt.reg and cpt.range classes. This necessitated the adding of the generics that we missing for cpt.range so it worked seamlessly across classes. * Repeated code to calculate the cpts from a ncpts argument in cpt.range methods was removed. +* The diagnostic plot for cpt.range objects (plot(object,diagnostic=TRUE)) has been changed to be more informative. The axes have been swapped and instead of a line graph, a stepped graph is used which better reflects that you can't get fractional numbers of changepoints (type="s"). This can be overridden with another type if preferred. Reworded the man file for plot-methods to reflect this change. +* Added the upper bound on the tested penalty values to the pen.value.full slot for CROPS output. Previously this was automatically removed and so you needed the original function call to know the upper bound that was tested. Version 2.2.5 ============= diff --git a/R/class_input.R b/R/class_input.R index db1dcda..34f14dc 100644 --- a/R/class_input.R +++ b/R/class_input.R @@ -4,11 +4,11 @@ class_input <- function(data, cpttype, method, test.stat, penalty, pen.value, mi }else{ ans=new("cpt") } - + data.set(ans)=data;cpttype(ans)=cpttype;method(ans)=method; test.stat(ans)=test.stat;pen.type(ans)=penalty;pen.value(ans)=pen.value;minseglen(ans)=minseglen;ans@date=date(); if(penalty!="CROPS"){ # crops is only one that doesn't give a single set of cpts cpts(ans)=out[[2]] - + if(param.estimates==TRUE){ if(test.stat == "Gamma"){ ans=param(ans, shape) @@ -17,7 +17,7 @@ class_input <- function(data, cpttype, method, test.stat, penalty, pen.value, mi } } } - + if(method=="PELT"){ ncpts.max(ans)=Inf } @@ -27,14 +27,14 @@ class_input <- function(data, cpttype, method, test.stat, penalty, pen.value, mi else{ ncpts.max(ans)=Q } - + if(method=="BinSeg"){ l=list() for(i in 1:(length(out$cps)/2)){ - l[[i]] = out$cps[1,1:i] + l[[i]] = out$cps[1,1:i] } m = t(sapply(l, '[', 1:max(sapply(l, length)))) - + cpts.full(ans)=m pen.value.full(ans)=out$cps[2,] }else if(method=="SegNeigh"){ @@ -42,11 +42,11 @@ class_input <- function(data, cpttype, method, test.stat, penalty, pen.value, mi pen.value.full(ans)=-diff(out$like.Q) }else if(penalty=="CROPS"){ m = t(sapply(out[[2]], '[', 1:max(sapply(out[[2]], length)))) - + cpts.full(ans) = m - pen.value.full(ans) = out[[1]][1,] + pen.value.full(ans) = c(out[[1]][1,],pen.value[2]) # add in the final penalty in the range as this is removed as a duplicate set of changepoints if(test.stat=="Gamma"){param.est(ans)$shape=shape} } - + return(ans) } diff --git a/R/cpt.class.R b/R/cpt.class.R index 86090d3..1edf3d2 100644 --- a/R/cpt.class.R +++ b/R/cpt.class.R @@ -668,12 +668,13 @@ setClass("cpt",slots=list(data.set="ts", cpttype="character", method="character" setMethod("plot","cpt.range",function(x,ncpts=NA,diagnostic=FALSE,cpt.col='red',cpt.width=1,cpt.style=1,...){ if(diagnostic==TRUE){ n.changepoints = apply(cpts.full(x), 1, function(x) sum(x > 0, na.rm = TRUE)) + n.changepoints=c(n.changepoints,n.changepoints[length(n.changepoints)]) # repeat the last value as this is also the number of changes for the upper pen.value tested penalty.values = pen.value.full(x) if (is.null(list(...)$type)) { # By default, the type of the diagnostic plots is "lines". - plot(x = n.changepoints, y = penalty.values, xlab = 'Number of Changepoints', ylab = 'Penalty Value', type = "l", ...) + plot(x = penalty.values, y = n.changepoints, type="s",ylab = 'Number of Changepoints', xlab = 'Penalty Value', ...) } else { - plot(x = n.changepoints, y = penalty.values, xlab = 'Number of Changepoints', ylab = 'Penalty Value', ...) + plot(x = penalty.values, y = n.changepoints,ylab = 'Number of Changepoints', xlab = 'Penalty Value', ...) } return(invisible(NULL)) } diff --git a/man/plot-methods.Rd b/man/plot-methods.Rd index d4169e3..6cb71b4 100644 --- a/man/plot-methods.Rd +++ b/man/plot-methods.Rd @@ -20,7 +20,7 @@ Plots the data and identifies the changepoints using vertical lines (change in variance), horizontal lines (change in mean). Optional arguments to control the lines: \code{cpt.col} equivilent to \code{col} to change the colour of the changepoint line; \code{cpt.width} equivilent to \code{lwd} to change the width of the changepoint line; \code{cpt.style} equivilent to \code{lty} to change the style of the line. } \item{\code{signature(x = "cpt.range")}}{ - As for the \code{cpt} objects except for two optional arguments, \code{ncpts} and \code{diagnostic}. The \code{ncpts} option allows you to specify a plot of the segmentation with \code{ncpts} changepoints in, i.e. the optimal may be specified as 10 changes but you want to plot the segmentation with 5 changes (provided a segmentation with 5 changes is listed in \code{cpts.full(x)}. The \code{diagnostic} option when set to \code{TRUE} plots the number of changepoints in each segmentation against the change in test statistic when adding that change. This can aide the decision on the number of changepoints as when a true changepoint is added the cost increases/decreases rapidly, but when a changepoint due to noise is added the change is small. This is akin to a scree plot in principal component analysis. The idea is that someone may choose to create a plot using \code{diagnostic=TRUE}, identify the appropriate number of changes and then replot using \code{ncpts} to visualize that segmentation. + As for the \code{cpt} objects except for two optional arguments, \code{ncpts} and \code{diagnostic}. The \code{ncpts} option allows you to specify a plot of the segmentation with \code{ncpts} changepoints in, i.e. the optimal may be specified as 10 changes but you want to plot the segmentation with 5 changes (provided a segmentation with 5 changes is listed in \code{cpts.full(x)}. The \code{diagnostic} option when set to \code{TRUE} plots the number of changepoints in each segmentation against the penalty values that give that number of changepoints. This can aide the decision on the number of changepoints as when a true changepoint is added the cost decreases considerably so it creates a stable region where several penalty values give the same number of changepoints, but when a changepoint due to noise is added the change in cost is small and so a small change in penalty value can vary the number of changes a lot. This is akin to a scree plot in principal component analysis. The idea is that someone may choose to create a plot using \code{diagnostic=TRUE}, identify the appropriate number of changes and then replot using \code{ncpts} to visualize that segmentation. } \item{\code{signature(x = "cpt.reg")}}{ Plotting is only valid for one regressor. Plots the regressor against the response and identifies the changepoints using horizontal lines. Optional arguments to control the lines: \code{cpt.col} equivilent to \code{col} to change the colour of the changepoint line; \code{cpt.width} equivilent to \code{lwd} to change the width of the changepoint line; \code{cpt.style} equivilent to \code{lty} to change the style of the line. @@ -30,4 +30,4 @@ \keyword{methods} \keyword{plot} \keyword{cpt} -\keyword{internal} \ No newline at end of file +\keyword{internal}