Skip to content

Commit

Permalink
Merge pull request #84 from jasenfinch/devel
Browse files Browse the repository at this point in the history
v0.14.7
  • Loading branch information
Jasen Finch authored Dec 17, 2021
2 parents 9768f74 + aa5b886 commit 2b6fe7b
Show file tree
Hide file tree
Showing 104 changed files with 5,942 additions and 10,052 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: metabolyseR
Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data
Version: 0.14.6
Version: 0.14.7
Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre"))
Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data.
URL: https://jasenfinch.github.io/metabolyseR
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ importFrom(magrittr,set_colnames)
importFrom(magrittr,set_names)
importFrom(magrittr,set_rownames)
importFrom(methods,"slot<-")
importFrom(methods,as)
importFrom(methods,new)
importFrom(methods,show)
importFrom(methods,slot)
Expand Down
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# metabolyseR 0.14.7

* Single replicate classes now automatically removed by [`plotLDA()`](https://jasenfinch.github.io/metabolyseR/reference/plotLDA.html).

# metabolyseR 0.14.6

* [`plotExplanatoryHeatmap`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html) method for the [`Analysis`](https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html) class now returns the plot only if the number of plots is equal to 1.
* [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html) method for the [`Analysis`](https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html) class now returns the plot only if the number of plots is equal to 1.

* Removed reference to the `nCores` parameter from the documentation example of [`metabolyse()`](https://jasenfinch.github.io/metabolyseR/reference/metabolyse.html).

Expand Down
19 changes: 19 additions & 0 deletions R/allClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,22 @@ setClass('Univariate',
models = 'list',
results = 'tbl_df'
))

setClass('LDA',
contains = 'AnalysisData',
slots = list(
stats = 'tbl_df',
Tw = 'numeric',
rankmat = 'numeric',
means = 'numeric',
loadings = 'tbl_df',
x = 'tbl_df',
xmeans = 'tbl_df',
pred = 'factor',
cl = 'factor',
prior = 'numeric',
conf = 'table',
acc = 'numeric',
lev = 'character',
call = 'call'
))
291 changes: 153 additions & 138 deletions R/nlda.R
Original file line number Diff line number Diff line change
@@ -1,141 +1,156 @@
## FIEmspro https://github.com/aberHRML/FIEmspro nlda functionality
## Based on FIEmspro https://github.com/aberHRML/FIEmspro nlda functionality

#'@importFrom e1071 naiveBayes
#'@importFrom stats cov predict
setGeneric('nlda',function(x,cls = 'class',prior = NULL,scale = FALSE,comprank = FALSE,...)
standardGeneric('nlda'))

`nlda.default` <-
function(dat,cl, prior=NULL,scale=FALSE,comprank = FALSE,...)
{
if (missing(dat) || missing(cl))
stop("data set or class are missing")
dat <- as.matrix(dat)

cl <- as.factor(cl)
if (any(table(cl) == 0)) stop("Can't have empty classes in cl.")

if (nrow(dat) != length(cl)) stop("dat and cl don't match.")
if (length(unique(cl)) < 2)
stop("Classification needs at least two classes.")
if (any(is.na(dat)) || any(is.na(cl)))
stop("NA is not permitted in data set or class labels.")

if(is.null(prior)){prior <- as.vector(table(cl)/length(cl))}
if(is.null(names(prior))){names(prior) <- levels(cl)}

pc <- prcomp(dat,scale=scale)

rankmat <- max(1,ncol(pc$x)-1)

if(comprank == TRUE)
rankmat <- qr(cov(dat)*(dim(dat)[1]-1))$rank
score <- pc$x[,1:rankmat,drop=FALSE]

g <- nlevels(cl)
mx <- apply(score,2,mean)
t <- matrix(0,nrow = rankmat,ncol=rankmat)
W <- matrix(0,nrow = rankmat,ncol=rankmat)
for(j in 1:g){
idx <- which(cl==levels(cl)[j])
L <- length(idx)
K <- score[idx,,drop=FALSE]
zz <- apply(K,2,mean)
A <- K - t(matrix(rep(mx, L),length(mx),L))
C <- K - t(matrix(rep(zz, L),length(zz),L))
t <- t + t(A)%*%A
W <- W + t(C)%*%C
}
B <- t-W

Ng <- nrow(score)-g
P <- W/(Ng)
eP <- eigen(P)
ord <- sort.list(eP$values)
V <- sweep(
eP$vectors[,ord,drop=FALSE],
2,
sqrt(colSums(eP$vectors[,ord,drop=FALSE]^2)),
"/")
Dg <- eP$values[ord]
nDg <- length(Dg)
Dmean <- sum(diag(P))/nDg
Dn <- matrix(0,nDg,nDg)
for(i in 1:nDg)
Dn[i,i] <- max(c(Dg[i],Dmean))

Wn <- V%*%Dn%*%t(V)*Ng
ratio <- solve(Wn)%*%B
er <- eigen(ratio)
ev <- Re(er$values)
ev[Im(er$values)>0] <- 0
vec <- Re(er$vectors)
ord <- sort.list(ev,decreasing=TRUE)
vec <- sweep(
vec[,ord,drop=FALSE],
2,
sqrt(colSums(vec[,ord,drop=FALSE]^2)),
"/")
ev <- ev[ord]
maxg <- min(c(g-1,dim(vec)[1]))
vec <- vec[,1:maxg] ## discriminant functions
Tw <- ev[1:maxg]
names(Tw) <- paste("DF", 1:maxg, sep = "")

## get stats here
flip <- function(x) x[rev(seq_along(x))]
n <- dim(dat)[1]
st <- matrix(0,length(Tw),3)
st[,1] <- round(Tw,3)
st[,2] <- round(Tw*100/sum(Tw),3)
st[,3] <- round(sqrt(Tw/(1+Tw)),3)
st <- as.data.frame(st)
dimnames(st) <- list(
paste("DF", 1:maxg, sep = ""),
c("Eig","Perceig","Cancor"))

res <- list()
res$stats <- st
res$Tw <- Tw
res$rankmat <- rankmat
res$means <- pc$center
res$loadings <- pc$rotation[,1:rankmat,drop=FALSE] %*%
vec ## discriminant functions with PCA

colnames(res$loadings) <- paste("DF", 1:maxg, sep = "")

## rotated data (projection)
x <- sweep(dat, 2, res$means) %*% res$loadings

## group means based on the rotated data
xmeans <- tapply(x, list(rep(cl,ncol(x)),col(x)), mean)
dimnames(xmeans)[[2]] <- colnames(x)

#if(type==1){
# mdist=as.matrix(dist(rbind(xmeans,x)))
# mdist=mdist[1:g,(g+1):ncol(mdist)]
# prob= (1-t(sweep(mdist,2,apply(mdist,2,sum),"/")))/(g-1)
# pred = apply(prob,1,which.max)
# pred <- factor(dimnames(prob)[[2]][pred], levels = levels(cl))
#}
#else{
nbmod <- naiveBayes(data.frame(x),cl)
prob <- predict(nbmod,data.frame(x),type="raw")
pred <- apply(prob,1,which.max)
pred <- factor(levels(cl)[pred], levels = levels(cl))
#}
res$x <- x
res$xmeans <- xmeans
res$pred <- pred
res$cl <- cl
res$prior <- prior
res$conf <- table(cl,pred)
res$acc <- round(sum(diag(res$conf))*100/nrow(dat),2)
res$lev <- levels(cl)
res$call <- match.call()
res$call[[1]] <- as.name("nlda")
class(res) <- "nlda"

return(res)
}
#' @importFrom e1071 naiveBayes
#' @importFrom stats cov predict
#' @importFrom methods as


nlda <- function (dat, ...) UseMethod ("nlda")
setMethod('nlda',signature = 'AnalysisData',
function(x,cls = 'class',prior=NULL,scale=FALSE,comprank = FALSE,...) {

cl <- x %>%
clsExtract(cls)

if (is.numeric(cl))
stop('Classes should not be numeric',call. = FALSE)

cl <- factor(cl,levels = unique(cl))

if (any(table(cl) < 2)) {
remove_classes <- cl %>%
table() %>%
names() %>%
{.[table(cl) < 2]}

x <- x %>%
removeClasses(cls = cls,
classes = remove_classes)

warning(str_c('Classes with a single replicate removed: ',
str_c(str_c('"',
remove_classes,
'"'),
collapse = ', ')),
call. = FALSE)

cl <- x %>%
clsExtract(cls) %>%
factor(levels = unique(.))
}

if (length(unique(cl)) < 2)
stop('More than 1 class needed for PC-LDA.',call. = FALSE)

d <- x %>%
dat() %>%
as.matrix()

if(is.null(prior)){prior <- as.vector(table(cl)/length(cl))}
if(is.null(names(prior))){names(prior) <- levels(cl)}

pc <- prcomp(d,scale=scale)

rankmat <- max(1,ncol(pc$x)-1)

if(comprank == TRUE)
rankmat <- qr(cov(d)*(dim(d)[1]-1))$rank
score <- pc$x[,1:rankmat,drop=FALSE]

g <- nlevels(cl)
mx <- apply(score,2,mean)
t <- matrix(0,nrow = rankmat,ncol=rankmat)
W <- matrix(0,nrow = rankmat,ncol=rankmat)
for(j in 1:g){
idx <- which(cl==levels(cl)[j])
L <- length(idx)
K <- score[idx,,drop=FALSE]
zz <- apply(K,2,mean)
A <- K - t(matrix(rep(mx, L),length(mx),L))
C <- K - t(matrix(rep(zz, L),length(zz),L))
t <- t + t(A)%*%A
W <- W + t(C)%*%C
}
B <- t-W

Ng <- nrow(score)-g
P <- W/(Ng)
eP <- eigen(P)
ord <- sort.list(eP$values)
V <- sweep(
eP$vectors[,ord,drop=FALSE],
2,
sqrt(colSums(eP$vectors[,ord,drop=FALSE]^2)),
"/")
Dg <- eP$values[ord]
nDg <- length(Dg)
Dmean <- sum(diag(P))/nDg
Dn <- matrix(0,nDg,nDg)
for(i in 1:nDg)
Dn[i,i] <- max(c(Dg[i],Dmean))

Wn <- V%*%Dn%*%t(V)*Ng
ratio <- solve(Wn)%*%B
er <- eigen(ratio)
ev <- Re(er$values)
ev[Im(er$values)>0] <- 0
vec <- Re(er$vectors)
ord <- sort.list(ev,decreasing=TRUE)
vec <- sweep(
vec[,ord,drop=FALSE],
2,
sqrt(colSums(vec[,ord,drop=FALSE]^2)),
"/")
ev <- ev[ord]
maxg <- min(c(g-1,dim(vec)[1]))
vec <- vec[,1:maxg] ## discriminant functions
Tw <- ev[1:maxg]
names(Tw) <- paste("DF", 1:maxg, sep = "")

## get stats here
flip <- function(x) x[rev(seq_along(x))]
n <- dim(d)[1]
st <- matrix(0,length(Tw),3)
st[,1] <- round(Tw,3)
st[,2] <- round(Tw*100/sum(Tw),3)
st[,3] <- round(sqrt(Tw/(1+Tw)),3)
st <- as.data.frame(st)
dimnames(st) <- list(
paste("DF", 1:maxg, sep = ""),
c("Eig","Perceig","Cancor"))

res <- as(x,'LDA')
res@stats <- as_tibble(st)
res@Tw <- Tw
res@rankmat <- rankmat
res@means <- pc$center

loadings <- pc$rotation[,1:rankmat,drop=FALSE] %*% vec
colnames(loadings) <- paste("DF", 1:maxg, sep = "")
x <- sweep(d, 2, res@means) %*% loadings

## group means based on the rotated data
xmeans <- tapply(x, list(rep(cl,ncol(x)),col(x)), mean)
dimnames(xmeans)[[2]] <- colnames(x)

nbmod <- naiveBayes(data.frame(x),cl)
prob <- predict(nbmod,data.frame(x),type="raw")
pred <- apply(prob,1,which.max)
pred <- factor(levels(cl)[pred], levels = levels(cl))

res@loadings <- as_tibble(loadings)
res@x <- as_tibble(x)
res@xmeans <- as_tibble(xmeans)
res@pred <- pred
res@cl <- cl
res@prior <- prior
res@conf <- table(cl,pred)
res@acc <- round(sum(diag(res@conf))*100/nrow(d),2)
res@lev <- levels(cl)
res@call <- match.call()
res@call[[1]] <- as.name("nlda")

return(res)
}
)
22 changes: 6 additions & 16 deletions R/plotLDA.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,24 +63,16 @@ setMethod('plotLDA',
legendPosition = 'bottom',
labelSize = 2){

classLength <- clsLen(analysis,cls)
lda <- nlda(analysis,cls = cls,scale = scale,center = center)

if (classLength < 2) {
stop('More than 1 class needed for PC-LDA.',call. = FALSE)
}

info <- analysis %>%
clsExtract(cls) %>%
factor()

lda <- nlda(dat(analysis),cl = info,scale = scale,center = center)

tw <- lda$Tw %>%
tw <- lda@Tw %>%
round(2)

lda <- lda$x %>%
classLength <- clsLen(lda,cls = cls)

lda <- lda@x %>%
as_tibble() %>%
mutate(!!cls := info)
mutate(!!cls := lda@cl)

if (classLength > 2) {
lda <- lda %>%
Expand All @@ -92,8 +84,6 @@ setMethod('plotLDA',
select(all_of(label)))
}

classLength <- clsLen(analysis,cls)

pl <- scatterPlot(lda,
cls,
xAxis,
Expand Down
Loading

0 comments on commit 2b6fe7b

Please sign in to comment.