Skip to content

Commit

Permalink
start
Browse files Browse the repository at this point in the history
  • Loading branch information
Solatar committed Mar 5, 2021
0 parents commit eb9aeac
Show file tree
Hide file tree
Showing 34 changed files with 598 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
9 changes: 9 additions & 0 deletions .Rproj.user/366773DE/pcs/files-pane.pper
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"sortOrder": [
{
"columnIndex": 2,
"ascending": true
}
],
"path": "~/bioinformatics/ggdendroplot"
}
3 changes: 3 additions & 0 deletions .Rproj.user/366773DE/pcs/source-pane.pper
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"activeTab": 1
}
14 changes: 14 additions & 0 deletions .Rproj.user/366773DE/pcs/windowlayoutstate.pper
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"left": {
"splitterpos": 428,
"topwindowstate": "NORMAL",
"panelheight": 1032,
"windowheight": 1070
},
"right": {
"splitterpos": 642,
"topwindowstate": "NORMAL",
"panelheight": 1032,
"windowheight": 1070
}
}
5 changes: 5 additions & 0 deletions .Rproj.user/366773DE/pcs/workbench-pane.pper
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"TabSet1": 0,
"TabSet2": 1,
"TabZoom": {}
}
8 changes: 8 additions & 0 deletions .Rproj.user/366773DE/persistent-state
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
build-last-errors="[]"
build-last-errors-base-dir="~/bioinformatics/ggdendroplot/"
build-last-outputs="[{\"type\":0,\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source ggdendroplot\\n\\n\"},{\"type\":1,\"output\":\"* installing to library ‘/home/nhuber/R/x86_64-redhat-linux-gnu-library/3.6’\\n\"},{\"type\":1,\"output\":\"* installing *source* package ‘ggdendroplot’ ...\\n\"},{\"type\":1,\"output\":\"** using staged installation\\n\"},{\"type\":1,\"output\":\"** R\\n\"},{\"type\":1,\"output\":\"** byte-compile and prepare package for lazy loading\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\" converting help for package ‘ggdendroplot’\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** help\\n\"},{\"type\":1,\"output\":\"*** installing help indices\\n\"},{\"type\":1,\"output\":\" finding HTML links ...\"},{\"type\":1,\"output\":\" geom_dendro html \\n\"},{\"type\":1,\"output\":\" hello html \\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\" done\\n\"},{\"type\":1,\"output\":\"** building package indices\\n\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from temporary location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from final location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\"},{\"type\":1,\"output\":\"* DONE (ggdendroplot)\\n\"},{\"type\":1,\"output\":\"\"}]"
compile_pdf_state="{\"tab_visible\":false,\"running\":false,\"target_file\":\"\",\"output\":\"\",\"errors\":[]}"
files.monitored-path=""
find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"ignoreCase\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOn\":[],\"matchOff\":[],\"replaceMatchOn\":[],\"replaceMatchOff\":[]},\"running\":false,\"replace\":false,\"preview\":false,\"gitFlag\":false,\"replacePattern\":\"\"}"
imageDirtyState="1"
saveActionState="-1"
5 changes: 5 additions & 0 deletions .Rproj.user/366773DE/rmd-outputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@





1 change: 1 addition & 0 deletions .Rproj.user/366773DE/saved_source_markers
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"active_set":"","sets":[]}
5 changes: 5 additions & 0 deletions .Rproj.user/366773DE/sources/prop/35DB2880
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"tempName": "Untitled1",
"cursorPosition": "0,2",
"scrollLine": "0"
}
4 changes: 4 additions & 0 deletions .Rproj.user/366773DE/sources/prop/B307ED6A
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"cursorPosition": "2,0",
"scrollLine": "0"
}
1 change: 1 addition & 0 deletions .Rproj.user/366773DE/sources/prop/CFE6B31E
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
3 changes: 3 additions & 0 deletions .Rproj.user/366773DE/sources/prop/INDEX
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
~%2FDesktop%2Fgghierarchy.R="B307ED6A"
~%2Fbioinformatics%2Fggdendroplot%2FR%2Fgeom_dendro.R="35DB2880"
~%2Fbioinformatics%2Fggdendroplot%2FR%2Fhello.R="CFE6B31E"
18 changes: 18 additions & 0 deletions .Rproj.user/366773DE/sources/s-9810FC5B/52DBD5A1-contents
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Hello, world!
#
# This is an example function named 'hello'
# which prints 'Hello, world!'.
#
# You can learn more about package authoring with RStudio at:
#
# http://r-pkgs.had.co.nz/
#
# Some useful keyboard shortcuts for package authoring:
#
# Install Package: 'Ctrl + Shift + B'
# Check Package: 'Ctrl + Shift + E'
# Test Package: 'Ctrl + Shift + T'

hello <- function() {
print("Hello, world!")
}
Empty file.
104 changes: 104 additions & 0 deletions .Rproj.user/366773DE/sources/s-9810FC5B/98169FE9-contents
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
seeqR::main()
x <- matrix(rnorm(100), nrow = 8)
rownames(x) <- paste0("a",seq(nrow(x)))
a <- dist(x)
clust <- hclust(a, method="complete")
ggdendro::ggdendrogram(clust)
ggplot() + geom_dendro(clust)
ggplot() + geom_dendro(x, axis.labels = T, pointing = "side", ylim = c(3,12))


geom_dendro <- function(clust, xlim=NULL, ylim=NULL, pointing="updown", show.labels=TRUE, axis.labels=TRUE, ...){
ranks <- clust$order
samples <- clust$labels[ranks]
dflabel <- data.frame(label=samples, x=seq(length(samples)), y=0)

confusingRanks <- sapply(seq(length(ranks)), function(x) which(ranks==x))

#========================
#df is a data.frame with one row for each arch
df <- as.data.frame(clust$merge) %>% mutate(y=clust$height)
dfy <- data.frame(y1=apply(df, 1, function(x) ifelse(x[1]<0, 0, df[x[1],"y"])),
y2=apply(df, 1, function(x) ifelse(x[2]<0, 0, df[x[2],"y"])) )

#needs to be a loop because the rows are calculated successively and depend on those that were calculated before
dfx <- data.frame()
for(i in 1:nrow(df)){
dfx[i,1] <- ifelse(df[i,1]<0, confusingRanks[abs(df[i,1])], mean(c(dfx[df[i,1],1], dfx[df[i,1],2])))
dfx[i,2] <- ifelse(df[i,2]<0, confusingRanks[abs(df[i,2])], mean(c(dfx[df[i,2],1], dfx[df[i,2],2])))
}
colnames(dfx) <- c("x1","x2")
df <- cbind(df, dfy, dfx)

#========================
#df2 is a list of data.frames (one for each arch), listing their x and y coordinates for a geom_path
df2 <- lapply(seq(nrow(df)), function(xyz){
rx <- unlist(df[xyz,])
return( data.frame(x=c(rx[6], rx[6], rx[7], rx[7]), y=c(rx[4], rx[3], rx[3], rx[5]), z=xyz ))
})

#========================
#df3 rbinds all data.frames from df2, with one column for x, y and z (z just stats which arch it is)
df3 <- do.call(rbind, df2)
if(pointing %in% "side"){
colnames(df3) <- c("y","x","z")
colnames(dflabel) <- c("label","y","x")
}

extremes <- c(range(df3$x), range(df3$y))
delta <- c(abs(extremes[2]-extremes[1]), abs(extremes[4]-extremes[3]))
if(is.null(xlim)) xlim <- extremes[1:2]
if(is.null(ylim)) ylim <- extremes[3:4]


#relevel the coordinates from 0 to 1 for both x and y
df4 <- df3
df4$x <- (df3$x-extremes[1])/delta[1]
df4$y <- (df3$y-extremes[3])/delta[2]
dflabel$x <- (dflabel$x-extremes[1])/delta[1]
dflabel$y <- (dflabel$y-extremes[3])/delta[2]

#relevel the coordinates to fit xlim and ylim (if not user-defined, the original values are taken)
df5 <- df4
df5$x <- (xlim[2]-xlim[1])*df4$x + xlim[1]
df5$y <- (ylim[2]-ylim[1])*df4$y + ylim[1]
dflabel$x <- (xlim[2]-xlim[1])*dflabel$x + xlim[1]
dflabel$y <- (ylim[2]-ylim[1])*dflabel$y + ylim[1]

df6 <- split(df5, df5$z)

output1 <- lapply(df6, function(x, ...){
return(ggplot2::layer(
data=x,
mapping=ggplot2::aes(x=x, y=y),
geom="path",
stat="identity",
position="identity",
show.legend=FALSE,
params=list(...)
))
})
if(show.labels){
output2 <- ggplot2::layer(
data=dflabel,
mapping=ggplot2::aes(x=x, y=y, label=label),
geom="text",
stat="identity",
position="identity",
show.legend=FALSE,
params=list(...)
)
if(pointing %in% "updown" & ylim[1]<ylim[2]){
output2$aes_params[["vjust"]] <- 1
}else if(pointing %in% "updown" & ylim[1]>ylim[2]){
output2$aes_params[["vjust"]] <- 0
}else if(pointing %in% "side" & xlim[1]<xlim[2]){
output2$aes_params[["hjust"]] <- 1
}else if(pointing %in% "side" & xlim[1]>xlim[2]){
output2$aes_params[["hjust"]] <- 0
}else{NULL}

}
return(c(output1, output2))
}

Empty file.
25 changes: 25 additions & 0 deletions .Rproj.user/366773DE/sources/s-9810FC5B/9C084B7F
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"id": "9C084B7F",
"path": "~/bioinformatics/ggdendroplot/R/geom_dendro.R",
"project_path": "R/geom_dendro.R",
"type": "r_source",
"hash": "0",
"contents": "",
"dirty": false,
"created": 1614929393099.0,
"source_on_save": false,
"relative_order": 2,
"properties": {
"tempName": "Untitled1",
"cursorPosition": "0,2",
"scrollLine": "0"
},
"folds": "",
"lastKnownWriteTime": 1614935809,
"encoding": "UTF-8",
"collab_server": "",
"source_window": "",
"last_content_update": 1614935809263,
"read_only": false,
"read_only_alternatives": []
}
131 changes: 131 additions & 0 deletions .Rproj.user/366773DE/sources/s-9810FC5B/9C084B7F-contents
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# devtools::document()

#' create a dendrogram as several geom_path layers ready to be added to a ggplot object
#'
#' Imports:
#' ggplot2
#'
#' @inheritParams ggplot2::geom_path
#' @import ggplot2
#'
#' @param data data.frame, columns will be clustered based on their rows.
#' @param xlim, vector with 2 numbers, on the x axis the dendrogram will beginn at the first number and end at the second.
#' @param ylim, vector with 2 numbers, on the y axis the dendrogram will beginn at the first number and end at the second.
#' @param pointing, string, either "side" or "updown" (default) to indicate where the dendrogram should point.
#' @param clustmethod string, method to be used by the hclust function (check ?hclust for available methods).
#' @param axis.labels, boolean, whether or not the axis should show the column names of data. This adds another layer to define the axis labels.
#' @return a list of several ggplot2 layer objects (geom_path for the dendrogram) that can directly be added to a ggplot2 object
#' @details the function uses geom_path for the dendrogram, so ... takes all arguements that geom_path would also take, such as color, size, etc.
#' @export
#' @examples
#' library(ggdendroplot)
#' library(ggplot2)
#'
#' df <- matrix(rnorm(128), ncol = 8)
#' colnames(df) <- paste0("a",seq(ncol(df)))
#'
#' ggplot() + geom_dendro(data=df)
#' ggplot() + geom_dendro(data=df, pointing="side")
#' ggplot() + geom_dendro(data=df, xlim=c(3,0))
#' ggplot() + geom_dendro(data=df, ylim=c(3,0))
#' ggplot() + geom_dendro(data=df, size=2, color="blue", linetype="dashed")
#' ggplot() + geom_dendro(data=df, size=4, lineend="round")
#'
geom_dendro <- function(data, xlim=NULL, ylim=NULL, pointing="updown", clustmethod="complete", axis.labels=TRUE, ...){

calc <- .plotcalculation(data=data, xlim=xlim, ylim=ylim, pointing=pointing, clustmethod=clustmethod)

dendro <- calc[["dendro"]] #for the dendrogram
plotlabels <- calc[["plotlabels"]] #for labels as axis text

#plot many geom_paths to create one dendrogram; each path is an item in a list
output <- lapply(dendro, function(x){
return(ggplot2::layer(
data=x,
mapping=ggplot2::aes(x=x, y=y),
geom="path",
stat="identity",
position="identity",
show.legend=FALSE,
params=list(...)
))
})

if(axis.labels){
ggplotlabel <- switch(pointing,
"updown"=scale_x_continuous(breaks=plotlabels$x, labels=plotlabels$label),
"side"=scale_y_continuous(breaks=plotlabels$y, labels=plotlabels$label))

output <- c(output, ggplotlabel)
}

return(output)
}

.plotcalculation <- function(data, xlim, ylim, pointing, clustmethod){
distmatrix <- dist(t(data))
clust <- hclust(distmatrix, method=clustmethod)

ranks <- clust$order
samples <- clust$labels[ranks]
dflabel <- data.frame(label=samples, x=seq(length(samples)), y=0)

confusingRanks <- sapply(seq(length(ranks)), function(x) which(ranks==x))

#========================
#df is a data.frame with one row for each arch
df <- cbind(as.data.frame(clust$merge), y=clust$height)
dfy <- data.frame(y1=apply(df, 1, function(x) ifelse(x[1]<0, 0, df[x[1],"y"])),
y2=apply(df, 1, function(x) ifelse(x[2]<0, 0, df[x[2],"y"])) )

#needs to be a loop because the rows are calculated successively and depend on those that were calculated before
dfx <- data.frame()
for(i in 1:nrow(df)){
dfx[i,1] <- ifelse(df[i,1]<0, confusingRanks[abs(df[i,1])], mean(c(dfx[df[i,1],1], dfx[df[i,1],2])))
dfx[i,2] <- ifelse(df[i,2]<0, confusingRanks[abs(df[i,2])], mean(c(dfx[df[i,2],1], dfx[df[i,2],2])))
}
colnames(dfx) <- c("x1","x2")
df <- cbind(df, dfy, dfx)

#========================
#df2 is a list of data.frames (one for each arch), listing their x and y coordinates for a geom_path
df2 <- lapply(seq(nrow(df)), function(xyz){
rx <- unlist(df[xyz,])
return( data.frame(x=c(rx[6], rx[6], rx[7], rx[7]), y=c(rx[4], rx[3], rx[3], rx[5]), z=xyz ))
})

#========================
#df3 rbinds all data.frames from df2, with one column for x, y and z (z just stats which arch it is)
df3 <- do.call(rbind, df2)
if(pointing %in% "side"){
colnames(df3) <- c("y","x","z")
colnames(dflabel) <- c("label","y","x")
}

extremes <- c(range(df3$x), range(df3$y))
delta <- c(abs(extremes[2]-extremes[1]), abs(extremes[4]-extremes[3]))
if(is.null(xlim)) xlim <- extremes[1:2]
if(is.null(ylim)) ylim <- extremes[3:4]


#relevel the coordinates from 0 to 1 for both x and y
df4 <- df3
df4$x <- (df3$x-extremes[1])/delta[1]
df4$y <- (df3$y-extremes[3])/delta[2]
dflabel$x <- (dflabel$x-extremes[1])/delta[1]
dflabel$y <- (dflabel$y-extremes[3])/delta[2]

#relevel the coordinates to fit xlim and ylim (if not user-defined, the original values are taken)
df5 <- df4
df5$x <- (xlim[2]-xlim[1])*df4$x + xlim[1]
df5$y <- (ylim[2]-ylim[1])*df4$y + ylim[1]
dflabel$x <- (xlim[2]-xlim[1])*dflabel$x + xlim[1]
dflabel$y <- (ylim[2]-ylim[1])*dflabel$y + ylim[1]

labelsInOrder=dflabel$label[order(dflabel$x, dflabel$y)]

df6 <- split(df5, df5$z)

output <- list(dendro=df6, plotlabels=dflabel, labelsInOrder=labelsInOrder)
return(output)
}
Empty file.
Empty file.
3 changes: 3 additions & 0 deletions .Rproj.user/shared/notebooks/paths
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/home/nhuber/Desktop/gghierarchy.R="1ACF579E"
/home/nhuber/bioinformatics/ggdendroplot/R/geom_dendro.R="E1598E29"
/home/nhuber/bioinformatics/ggdendroplot/R/hello.R="7AD6E14A"
13 changes: 13 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Package: ggdendroplot
Type: Package
Title: Create dendrograms for ggplot
Depends: R, stats
Imports: ggplot2
Version: 0.1.0
Author: Nicolas Huber
Maintainer: Nicolas Huber <info.huber@aol.de>
Description: Create a dendrogram as a layer, ready to be added to ggplot.
License: What license is it under?
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.0
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Generated by roxygen2: do not edit by hand

export(geom_dendro)
import(ggplot2)
Loading

0 comments on commit eb9aeac

Please sign in to comment.