Skip to content

Commit

Permalink
Merge branch 'alphadev'
Browse files Browse the repository at this point in the history
  • Loading branch information
Francesco Grossetti committed May 27, 2017
2 parents 9967280 + 8433b24 commit 544de9f
Show file tree
Hide file tree
Showing 9 changed files with 242 additions and 14 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(augment)
export(prevplot)
export(quake)
export(survplot)
import(data.table)
importFrom(grDevices,dev.cur)
Expand Down
19 changes: 18 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
------

# msmtools 1.3

### Major changes

* The new function `quake()` is introduced. This adds support in the preprocessing
part of the analysis. `quake()` addresses the specific problem of different
transitions occurring at the same exact time. This is a case for which a
multi-state model fails to estimate the probability associated with the
two transitions.

### Minor changes

* Global variables are now correctly declared on top of functions using
Expand All @@ -23,6 +33,13 @@ francesco.grossetti@unibocconi.it.
first call. This means that you can finally print on console the augmented dataset
right away.

* `pandoc` versions prior 1.17 does not fully support spaces in file names and
caused a warning when compiling `msmtools` under Fedora using both `clang`
ang `gcc`. Now all file names are without spaces. `msmtools` 1.3 has been built
using `pandoc` 1.19.2 and `pandoc-citeproc` 0.10.4.1

------

# msmtools 1.2

### Major changes
Expand Down Expand Up @@ -75,7 +92,7 @@ blocked the object defined by `n_events`.
produced during the status flag assignment. This was due to a wrong rounding of the amount of
augmenting factor for each unit.

---
------

# msmtools 1.1

Expand Down
4 changes: 2 additions & 2 deletions R/augment.R
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ augment = function( data, data_key, n_events, pattern, state = list ( 'IN', 'OUT
}
if ( check_NA == TRUE ) {
if ( verbose == TRUE ) {
message( 'checking for any missing data in function arguments' )
message( 'checking for any missing values in function arguments' )
}
checks = c( cols, pattern, t_start, t_end )
test = apply( data[ , checks, with = FALSE ], 2, function( x ) any( sum( is.na( x ) ) > 0 ) )
Expand All @@ -298,7 +298,7 @@ augment = function( data, data_key, n_events, pattern, state = list ( 'IN', 'OUT
invisible( sapply( names( test[ test == TRUE ] ), function( x ) cat( x, '\n' ) ) )
stop( 'Please, fix the issues and relaunch augment()' )
} else {
cat( 'Ok, no missing data detected\n')
cat( 'Ok, no missing values detected\n')
cat( '---\n' )
}
}
Expand Down
156 changes: 156 additions & 0 deletions R/quake.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#' Delete different events occurring at the same time
#'
#' Fast algorithm to get rid of transitions to different states occurring at the same exact time
#' when dealing with augmented data as computed by \code{augment}.
#'
#' @param data An augmented \code{data.table} or \code{data.frame} object where each row
#' represents a transition. If \code{data} is a \code{data.frame}, then \code{quake} internally
#' casts it to a \code{data.table}.
#' @param data_key A keying variable which \code{quake} uses to define a key for \code{data}.
#' This represents the subject ID (See \code{\link[data.table]{setkey}}).
#' @param pattern ID status at the end of the study as passed to \code{augment} (See
#' \code{\link[msmtools]{augment}}).
#' @param target The target variable to check duplicates. By default it is set to 'augmented_int'.
#' @param check_NA If \code{TRUE}, then arguments \code{data_key}, \code{pattern},
#' and \code{target} are looked up for any missing data and if the function finds
#' any, it stops with error. Default is \code{FALSE}.
#' @param verbose If \code{FALSE}, all information produced by \code{print}, \code{cat} and
#' \code{message} are suppressed. All is done internally so that no global
#' options are changed. \code{verbose} can be set to \code{FALSE} on all common OS
#' (see also \code{\link[base]{sink}} and \code{\link[base]{options}}). Default is \code{TRUE}.
#'
#' @details blablabla some details to write down
#' @seealso \code{\link[msmtools]{augment}}
#'
#' @examples
#' data( hosp )
#' hosp_aug = augment( data = hosp, data_key = subj, n_events = adm_number, pattern = label_3,
#' t_start = dateIN, t_end = dateOUT, t_cens = dateCENS )
#' hosp_aug_clean = quake( data = hosp_aug, data_key = subj, pattern = label_3 )
#'
#' @author Francesco Grossetti \email{francesco.grossetti@@polimi.it}.
#' @import data.table
#' @export

quake = function( data, data_key, pattern, target, check_NA = FALSE, verbose = TRUE ) {

tic = proc.time()
index = NULL

if ( missing( data ) ) {
stop( 'a dataset of class data.table or data.frame must be provided' )
}
if ( !inherits( data, "data.table" ) && !inherits( data, "data.frame" ) ) {
stop( "a dataset of class data.table or data.frame must be provided" )
}
if ( missing( data_key ) ) {
stop( 'a variable of keying must be provided' )
}
if ( missing( pattern ) ) {
stop( "a pattern must be provided" )
}
if ( inherits( data, 'data.frame' ) ) {
setDT( data )
}
if ( verbose == TRUE ) {
cat( '-------------------------------------\n' )
cat( '# # # # setting everything up # # # #\n' )
cat( '-------------------------------------\n' )
}

setkey( data, NULL )
cols = as.character( substitute( list( data_key ) )[ -1L ] )
if ( !length( cols ) ) {
cols = colnames( data )
}
setkeyv( data, cols )
pattern = as.character( substitute( list( pattern ) )[ -1L ] )
if ( missing( target ) ) {
target = 'augmented_int'
} else {
target = as.character( substitute( list( target ) )[ -1L ] )
}

if ( check_NA == TRUE ) {
if ( verbose == TRUE ) {
message( 'checking for any missing values in function arguments' )
}
checks = c( cols, pattern, target )
test = apply( data[ , checks, with = FALSE ], 2, function( x ) any( sum( is.na( x ) ) > 0 ) )
if ( any ( test ) ) {
cat( '---\n' )
if ( verbose == TRUE ) {
message( 'detected missing values in the following variables:' )
}
invisible( sapply( names( test[ test == TRUE ] ), function( x ) cat( x, '\n' ) ) )
stop( 'Please, fix the issues and relaunch shiver()' )
} else {
cat( 'Ok, no missing values detected\n')
cat( '---\n' )
}
}

data[ , index := sequence( .N ) ]
n_patients = uniqueN( eval( substitute( data$cols ) ) )
values = sort( eval( substitute( unique( data$pattern ) ) ) )
if ( length( values ) < 2 ) {
stop( 'unit identification label must be an integer, a factor or a character
with at least 2 elements' )
}

alive = data[ get( pattern ) == values[ 1 ] ]
alive.last = alive[ alive[ , .I[ .N ], by = eval( cols ) ]$V1 ]
setkey( alive.last, index )
setkey( alive, index )
alive.no.last = alive[ !alive.last ]

if ( verbose == TRUE ) {
message( 'checking ', substitute( pattern ), ' and defining patterns' )
}
if ( length( values ) == 2 ) {
cat( 'detected only 2 values\n' )
cat( '---\n' )
dead = data[ get( pattern ) == values[ 2 ] ]
} else if ( length( values ) == 3 ) {
cat( 'Ok, detected 3 values\n' )
dead = data[ get( pattern ) != values[ 1 ] ]
cat( '---\n' )
}

l = list( alive.no.last, dead )
data.no.last.event = rbindlist( l )
row.duplicated = duplicated( data.no.last.event, by = c( eval( cols ), eval( target ) ) )
duplicated = data.no.last.event[ row.duplicated == TRUE ]
n_duplicated = uniqueN( eval( substitute( duplicated$cols ) ) )
setkeyv( duplicated, cols )

if ( n_duplicated == 0 ) {
message( 'Hurray! No duplicated occurrences have been found in ', substitute( data ),
' according to variable ', substitute( target ) )
} else {
message( 'Spotted ', n_duplicated,
' patients with at least a duplicated occurrence according to variable ',
substitute( target ) )
data.clean = data[ !duplicated ]
n_patients.to.keep = uniqueN( eval( substitute( data.clean$cols ) ) )
cat( n_patients.to.keep, ' patients have been reained corresponding to ',
round( 100 * ( n_patients.to.keep / n_patients ), 2 ), '%\n', sep = '' )
cat( 'Duplicated patients have been sucessfully removed\n' )
}

data[ , index := NULL ]
if ( n_duplicated > 0 ) {
data.clean[ , index := NULL ]
}
toc = proc.time()
time = toc - tic
cat( '---------------------------\n' )
cat( 'quake() took:', time[ 3 ], 'sec. \n', sep = ' ' )
cat( '---------------------------\n' )

if ( n_duplicated == 0 ) {
return( invisible( data ) )
} else {
return( invisible( data.clean ) )
}
}
5 changes: 3 additions & 2 deletions R/survplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ if ( getRversion() >= "2.15.1" ) {
#' legend is not shown.
#' @param xlab \emph{x} axis label.
#' @param ylab \emph{y} axis label.
#' @param main The main title of the plot(s) as character. Default is \code{NULL}.
#' @param lty.fit Line type for the fitted curve. See \code{\link[graphics]{par}}.
#' @param lwd.fit Line width for the fitted curve. See \code{\link[graphics]{par}}.
#' @param col.fit Line color for the fitted curve. See \code{\link[graphics]{par}}.
Expand Down Expand Up @@ -210,7 +211,7 @@ survplot = function( x, from = 1, to = NULL, range = NULL, covariates = "mean",
convert = FALSE, add = FALSE,
ci = c( "none", "normal", "bootstrap" ), interp = c( "start", "midpoint" ),
B = 100L, legend.pos = 'topright',
xlab = "Time", ylab = "Survival Probability",
xlab = "Time", ylab = "Survival Probability", main = NULL,
lty.fit = 1, lwd.fit = 1, col.fit = "red", lty.ci.fit = 3, lwd.ci.fit = 1,
col.ci.fit = col.fit, mark.time = FALSE,
lty.km = 5, lwd.km = 1, col.km = "darkblue",
Expand Down Expand Up @@ -288,7 +289,7 @@ survplot = function( x, from = 1, to = NULL, range = NULL, covariates = "mean",
dev.set( dev.cur() )
}
plot( times, 1 - pr, type = "l", xlab = xlab, ylab = ylab, ylim = c( 0, 1 ),
lwd = lwd.fit, lty = lty.fit, col = col.fit )
lwd = lwd.fit, lty = lty.fit, col = col.fit, main = main )
} else {
lines( times, 1 - pr, lwd = lwd.fit, lty = lty.fit, col = col.fit )
}
Expand Down
51 changes: 51 additions & 0 deletions man/quake.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/survplot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions vignettes/msmtools.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
## ----ninja, echo = FALSE-------------------------------------------------
library( msmtools )

## ----long example, collapse = TRUE---------------------------------------
## ----long_example, collapse = TRUE---------------------------------------
data( hosp )
hosp[ 1:17, .( subj, adm_number, gender, age, label_2,
dateIN, dateOUT, dateCENS ) ]

## ----running augment, collapse = TRUE------------------------------------
## ----running_augment, collapse = TRUE------------------------------------
hosp_augmented = augment( data = hosp, data_key = subj,
n_events = adm_number, pattern = label_2,
t_start = dateIN, t_end = dateOUT,
Expand All @@ -19,7 +19,7 @@ hosp_augmented[ 1:35, .( subj, adm_number, gender, age, label_2,
hosp[ 18:28, .( subj, adm_number, rehab, it, rehab_it,
dateIN, dateOUT, dateCENS ) ]

## ----complex status, collapse = TRUE-------------------------------------
## ----complex_status, collapse = TRUE-------------------------------------
hosp_augmented_more = augment( data = hosp, data_key = subj,
n_events = adm_number, pattern = label_2,
t_start = dateIN, t_end = dateOUT,
Expand All @@ -29,7 +29,7 @@ hosp_augmented_more = augment( data = hosp, data_key = subj,
hosp_augmented_more[ 36:60, .( subj, adm_number, rehab_it,
augmented, status, status_exp, n_status_exp ) ]

## ----multistate model, collapse = TRUE-----------------------------------
## ----multistate_model, collapse = TRUE-----------------------------------
# let's define the initial transition matrix for our model
Qmat = matrix( data = 0, nrow = 3, ncol = 3, byrow = TRUE )
Qmat[ 1, 1:3 ] = 1
Expand All @@ -49,15 +49,15 @@ msm_model = msm( status_num ~ augmented_int,
control = list( fnscale = 6e+05, trace = 0,
REPORT = 1, maxit = 10000 ) )

## ----survplot 1, fig.align = 'center', fig.width = 5, fig.height = 4-----
## ----survplot_1, fig.align = 'center', fig.width = 5, fig.height = 4-----
survplot( msm_model, km = TRUE, ci = 'none',
verbose = FALSE, devnew = FALSE )

## ----survplot 2, fig.align = 'center', fig.width = 5, fig.height = 4-----
## ----survplot_2, fig.align = 'center', fig.width = 5, fig.height = 4-----
survplot( msm_model, km = TRUE, from = 2, ci = 'none',
verbose = FALSE, devnew = FALSE )

## ----custom time seq, fig.align = 'center', fig.width = 5, fig.height = 4----
## ----custom_time_seq, fig.align = 'center', fig.width = 5, fig.height = 4----
time_seq = seq( 300, 800, by = 30 )
survplot( msm_model, times = time_seq, ci = 'none',
verbose = FALSE, devnew = FALSE )
Expand Down Expand Up @@ -99,7 +99,7 @@ all_data = survplot( msm_model, ci = 'none', grid = 10,
# let's see the datasets
all_data

## ----splitting data, collapse = TRUE-------------------------------------
## ----splitting_data, collapse = TRUE-------------------------------------
# do not extract data using just one [].
# This keeps the class, so it returns a list
km_data_wrong = all_data[ 1 ]
Expand Down
Binary file modified vignettes/msmtools.pdf
Binary file not shown.

0 comments on commit 544de9f

Please sign in to comment.